src/compiler/nir/nir_linking_helpers.c

   1 /*
   2  * Copyright © 2015 Intel Corporation
   3  *
   4  * Permission is hereby granted, free of charge, to any person obtaining a
   5  * copy of this software and associated documentation files (the "Software"),
   6  * to deal in the Software without restriction, including without limitation
   7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8  * and/or sell copies of the Software, and to permit persons to whom the
   9  * Software is furnished to do so, subject to the following conditions:
  10  *
  11  * The above copyright notice and this permission notice (including the next
  12  * paragraph) shall be included in all copies or substantial portions of the
  13  * Software.
  14  *
  15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  21  * IN THE SOFTWARE.
  22  */
  23
  24 #include "nir.h"
  25 #include "nir_builder.h"
  26 #include "util/set.h"
  27 #include "util/hash_table.h"
  28
  29 /* This file contains various little helpers for doing simple linking in
  30  * NIR.  Eventually, we'll probably want a full-blown varying packing
  31  * implementation in here.  Right now, it just deletes unused things.
  32  */
  33
  34 /**
  35  * Returns the bits in the inputs_read, outputs_written, or
  36  * system_values_read bitfield corresponding to this variable.
  37  */
  38 static uint64_t
  39 get_variable_io_mask(nir_variable *var, gl_shader_stage stage)
  40 {
  41    if (var->data.location < 0)
  42       return 0;
  43
  44    unsigned location = var->data.patch ?
  45       var->data.location - VARYING_SLOT_PATCH0 : var->data.location;
  46
  47    assert(var->data.mode == nir_var_shader_in ||
  48           var->data.mode == nir_var_shader_out ||
  49           var->data.mode == nir_var_system_value);
  50    assert(var->data.location >= 0);
  51
  52    const struct glsl_type *type = var->type;
  53    if (nir_is_per_vertex_io(var, stage) || var->data.per_view) {
  54       assert(glsl_type_is_array(type));
  55       type = glsl_get_array_element(type);
  56    }
  57
  58    unsigned slots = glsl_count_attribute_slots(type, false);
  59    return ((1ull << slots) - 1) << location;
  60 }
  61
  62 static uint8_t
  63 get_num_components(nir_variable *var)
  64 {
  65    if (glsl_type_is_struct_or_ifc(glsl_without_array(var->type)))
  66       return 4;
  67
  68    return glsl_get_vector_elements(glsl_without_array(var->type));
  69 }
  70
  71 static void
  72 tcs_add_output_reads(nir_shader *shader, uint64_t *read, uint64_t *patches_read)
  73 {
  74    nir_foreach_function(function, shader) {
  75       if (!function->impl)
  76          continue;
  77
  78       nir_foreach_block(block, function->impl) {
  79          nir_foreach_instr(instr, block) {
  80             if (instr->type != nir_instr_type_intrinsic)
  81                continue;
  82
  83             nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
  84             if (intrin->intrinsic != nir_intrinsic_load_deref)
  85                continue;
  86
  87             nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
  88             if (deref->mode != nir_var_shader_out)
  89                continue;
  90
  91             nir_variable *var = nir_deref_instr_get_variable(deref);
  92             for (unsigned i = 0; i < get_num_components(var); i++) {
  93                if (var->data.patch) {
  94                   patches_read[var->data.location_frac + i] |=
  95                      get_variable_io_mask(var, shader->info.stage);
  96                } else {
  97                   read[var->data.location_frac + i] |=
  98                      get_variable_io_mask(var, shader->info.stage);
  99                }
 100             }
 101          }
 102       }
 103    }
 104 }
 105
 106 /**
 107  * Helper for removing unused shader I/O variables, by demoting them to global
 108  * variables (which may then by dead code eliminated).
 109  *
 110  * Example usage is:
 111  *
 112  * progress = nir_remove_unused_io_vars(producer, nir_var_shader_out,
 113  *                                      read, patches_read) ||
 114  *                                      progress;
 115  *
 116  * The "used" should be an array of 4 uint64_ts (probably of VARYING_BIT_*)
 117  * representing each .location_frac used.  Note that for vector variables,
 118  * only the first channel (.location_frac) is examined for deciding if the
 119  * variable is used!
 120  */
 121 bool
 122 nir_remove_unused_io_vars(nir_shader *shader,
 123                           nir_variable_mode mode,
 124                           uint64_t *used_by_other_stage,
 125                           uint64_t *used_by_other_stage_patches)
 126 {
 127    bool progress = false;
 128    uint64_t *used;
 129
 130    assert(mode == nir_var_shader_in || mode == nir_var_shader_out);
 131
 132    nir_foreach_variable_with_modes_safe(var, shader, mode) {
 133       if (var->data.patch)
 134          used = used_by_other_stage_patches;
 135       else
 136          used = used_by_other_stage;
 137
 138       if (var->data.location < VARYING_SLOT_VAR0 && var->data.location >= 0)
 139          continue;
 140
 141       if (var->data.always_active_io)
 142          continue;
 143
 144       if (var->data.explicit_xfb_buffer)
 145          continue;
 146
 147       uint64_t other_stage = used[var->data.location_frac];
 148
 149       if (!(other_stage & get_variable_io_mask(var, shader->info.stage))) {
 150          /* This one is invalid, make it a global variable instead */
 151          var->data.location = 0;
 152          var->data.mode = nir_var_shader_temp;
 153
 154          progress = true;
 155       }
 156    }
 157
 158    if (progress)
 159       nir_fixup_deref_modes(shader);
 160
 161    return progress;
 162 }
 163
 164 bool
 165 nir_remove_unused_varyings(nir_shader *producer, nir_shader *consumer)
 166 {
 167    assert(producer->info.stage != MESA_SHADER_FRAGMENT);
 168    assert(consumer->info.stage != MESA_SHADER_VERTEX);
 169
 170    uint64_t read[4] = { 0 }, written[4] = { 0 };
 171    uint64_t patches_read[4] = { 0 }, patches_written[4] = { 0 };
 172
 173    nir_foreach_shader_out_variable(var, producer) {
 174       for (unsigned i = 0; i < get_num_components(var); i++) {
 175          if (var->data.patch) {
 176             patches_written[var->data.location_frac + i] |=
 177                get_variable_io_mask(var, producer->info.stage);
 178          } else {
 179             written[var->data.location_frac + i] |=
 180                get_variable_io_mask(var, producer->info.stage);
 181          }
 182       }
 183    }
 184
 185    nir_foreach_shader_in_variable(var, consumer) {
 186       for (unsigned i = 0; i < get_num_components(var); i++) {
 187          if (var->data.patch) {
 188             patches_read[var->data.location_frac + i] |=
 189                get_variable_io_mask(var, consumer->info.stage);
 190          } else {
 191             read[var->data.location_frac + i] |=
 192                get_variable_io_mask(var, consumer->info.stage);
 193          }
 194       }
 195    }
 196
 197    /* Each TCS invocation can read data written by other TCS invocations,
 198     * so even if the outputs are not used by the TES we must also make
 199     * sure they are not read by the TCS before demoting them to globals.
 200     */
 201    if (producer->info.stage == MESA_SHADER_TESS_CTRL)
 202       tcs_add_output_reads(producer, read, patches_read);
 203
 204    bool progress = false;
 205    progress = nir_remove_unused_io_vars(producer, nir_var_shader_out, read,
 206                                         patches_read);
 207
 208    progress = nir_remove_unused_io_vars(consumer, nir_var_shader_in, written,
 209                                         patches_written) || progress;
 210
 211    return progress;
 212 }
 213
 214 static uint8_t
 215 get_interp_type(nir_variable *var, const struct glsl_type *type,
 216                 bool default_to_smooth_interp)
 217 {
 218    if (glsl_type_is_integer(type))
 219       return INTERP_MODE_FLAT;
 220    else if (var->data.interpolation != INTERP_MODE_NONE)
 221       return var->data.interpolation;
 222    else if (default_to_smooth_interp)
 223       return INTERP_MODE_SMOOTH;
 224    else
 225       return INTERP_MODE_NONE;
 226 }
 227
 228 #define INTERPOLATE_LOC_SAMPLE 0
 229 #define INTERPOLATE_LOC_CENTROID 1
 230 #define INTERPOLATE_LOC_CENTER 2
 231
 232 static uint8_t
 233 get_interp_loc(nir_variable *var)
 234 {
 235    if (var->data.sample)
 236       return INTERPOLATE_LOC_SAMPLE;
 237    else if (var->data.centroid)
 238       return INTERPOLATE_LOC_CENTROID;
 239    else
 240       return INTERPOLATE_LOC_CENTER;
 241 }
 242
 243 static bool
 244 is_packing_supported_for_type(const struct glsl_type *type)
 245 {
 246    /* We ignore complex types such as arrays, matrices, structs and bitsizes
 247     * other then 32bit. All other vector types should have been split into
 248     * scalar variables by the lower_io_to_scalar pass. The only exception
 249     * should be OpenGL xfb varyings.
 250     * TODO: add support for more complex types?
 251     */
 252    return glsl_type_is_scalar(type) && glsl_type_is_32bit(type);
 253 }
 254
 255 struct assigned_comps
 256 {
 257    uint8_t comps;
 258    uint8_t interp_type;
 259    uint8_t interp_loc;
 260    bool is_32bit;
 261 };
 262
 263 /* Packing arrays and dual slot varyings is difficult so to avoid complex
 264  * algorithms this function just assigns them their existing location for now.
 265  * TODO: allow better packing of complex types.
 266  */
 267 static void
 268 get_unmoveable_components_masks(nir_shader *shader,
 269                                 nir_variable_mode mode,
 270                                 struct assigned_comps *comps,
 271                                 gl_shader_stage stage,
 272                                 bool default_to_smooth_interp)
 273 {
 274    nir_foreach_variable_with_modes_safe(var, shader, mode) {
 275       assert(var->data.location >= 0);
 276
 277       /* Only remap things that aren't built-ins. */
 278       if (var->data.location >= VARYING_SLOT_VAR0 &&
 279           var->data.location - VARYING_SLOT_VAR0 < MAX_VARYINGS_INCL_PATCH) {
 280
 281          const struct glsl_type *type = var->type;
 282          if (nir_is_per_vertex_io(var, stage) || var->data.per_view) {
 283             assert(glsl_type_is_array(type));
 284             type = glsl_get_array_element(type);
 285          }
 286
 287          /* If we can pack this varying then don't mark the components as
 288           * used.
 289           */
 290          if (is_packing_supported_for_type(type))
 291             continue;
 292
 293          unsigned location = var->data.location - VARYING_SLOT_VAR0;
 294
 295          unsigned elements =
 296             glsl_type_is_vector_or_scalar(glsl_without_array(type)) ?
 297             glsl_get_vector_elements(glsl_without_array(type)) : 4;
 298
 299          bool dual_slot = glsl_type_is_dual_slot(glsl_without_array(type));
 300          unsigned slots = glsl_count_attribute_slots(type, false);
 301          unsigned dmul = glsl_type_is_64bit(glsl_without_array(type)) ? 2 : 1;
 302          unsigned comps_slot2 = 0;
 303          for (unsigned i = 0; i < slots; i++) {
 304             if (dual_slot) {
 305                if (i & 1) {
 306                   comps[location + i].comps |= ((1 << comps_slot2) - 1);
 307                } else {
 308                   unsigned num_comps = 4 - var->data.location_frac;
 309                   comps_slot2 = (elements * dmul) - num_comps;
 310
 311                   /* Assume ARB_enhanced_layouts packing rules for doubles */
 312                   assert(var->data.location_frac == 0 ||
 313                          var->data.location_frac == 2);
 314                   assert(comps_slot2 <= 4);
 315
 316                   comps[location + i].comps |=
 317                      ((1 << num_comps) - 1) << var->data.location_frac;
 318                }
 319             } else {
 320                comps[location + i].comps |=
 321                   ((1 << (elements * dmul)) - 1) << var->data.location_frac;
 322             }
 323
 324             comps[location + i].interp_type =
 325                get_interp_type(var, type, default_to_smooth_interp);
 326             comps[location + i].interp_loc = get_interp_loc(var);
 327             comps[location + i].is_32bit =
 328                glsl_type_is_32bit(glsl_without_array(type));
 329          }
 330       }
 331    }
 332 }
 333
 334 struct varying_loc
 335 {
 336    uint8_t component;
 337    uint32_t location;
 338 };
 339
 340 static void
 341 mark_all_used_slots(nir_variable *var, uint64_t *slots_used,
 342                     uint64_t slots_used_mask, unsigned num_slots)
 343 {
 344    unsigned loc_offset = var->data.patch ? VARYING_SLOT_PATCH0 : 0;
 345
 346    slots_used[var->data.patch ? 1 : 0] |= slots_used_mask &
 347       BITFIELD64_RANGE(var->data.location - loc_offset, num_slots);
 348 }
 349
 350 static void
 351 mark_used_slot(nir_variable *var, uint64_t *slots_used, unsigned offset)
 352 {
 353    unsigned loc_offset = var->data.patch ? VARYING_SLOT_PATCH0 : 0;
 354
 355    slots_used[var->data.patch ? 1 : 0] |=
 356       BITFIELD64_BIT(var->data.location - loc_offset + offset);
 357 }
 358
 359 static void
 360 remap_slots_and_components(nir_shader *shader, nir_variable_mode mode,
 361                            struct varying_loc (*remap)[4],
 362                            uint64_t *slots_used, uint64_t *out_slots_read,
 363                            uint32_t *p_slots_used, uint32_t *p_out_slots_read)
 364  {
 365    const gl_shader_stage stage = shader->info.stage;
 366    uint64_t out_slots_read_tmp[2] = {0};
 367    uint64_t slots_used_tmp[2] = {0};
 368
 369    /* We don't touch builtins so just copy the bitmask */
 370    slots_used_tmp[0] = *slots_used & BITFIELD64_RANGE(0, VARYING_SLOT_VAR0);
 371
 372    nir_foreach_variable_with_modes(var, shader, mode) {
 373       assert(var->data.location >= 0);
 374
 375       /* Only remap things that aren't built-ins */
 376       if (var->data.location >= VARYING_SLOT_VAR0 &&
 377           var->data.location - VARYING_SLOT_VAR0 < MAX_VARYINGS_INCL_PATCH) {
 378
 379          const struct glsl_type *type = var->type;
 380          if (nir_is_per_vertex_io(var, stage) || var->data.per_view) {
 381             assert(glsl_type_is_array(type));
 382             type = glsl_get_array_element(type);
 383          }
 384
 385          unsigned num_slots = glsl_count_attribute_slots(type, false);
 386          bool used_across_stages = false;
 387          bool outputs_read = false;
 388
 389          unsigned location = var->data.location - VARYING_SLOT_VAR0;
 390          struct varying_loc *new_loc = &remap[location][var->data.location_frac];
 391
 392          unsigned loc_offset = var->data.patch ? VARYING_SLOT_PATCH0 : 0;
 393          uint64_t used = var->data.patch ? *p_slots_used : *slots_used;
 394          uint64_t outs_used =
 395             var->data.patch ? *p_out_slots_read : *out_slots_read;
 396          uint64_t slots =
 397             BITFIELD64_RANGE(var->data.location - loc_offset, num_slots);
 398
 399          if (slots & used)
 400             used_across_stages = true;
 401
 402          if (slots & outs_used)
 403             outputs_read = true;
 404
 405          if (new_loc->location) {
 406             var->data.location = new_loc->location;
 407             var->data.location_frac = new_loc->component;
 408          }
 409
 410          if (var->data.always_active_io) {
 411             /* We can't apply link time optimisations (specifically array
 412              * splitting) to these so we need to copy the existing mask
 413              * otherwise we will mess up the mask for things like partially
 414              * marked arrays.
 415              */
 416             if (used_across_stages)
 417                mark_all_used_slots(var, slots_used_tmp, used, num_slots);
 418
 419             if (outputs_read) {
 420                mark_all_used_slots(var, out_slots_read_tmp, outs_used,
 421                                    num_slots);
 422             }
 423          } else {
 424             for (unsigned i = 0; i < num_slots; i++) {
 425                if (used_across_stages)
 426                   mark_used_slot(var, slots_used_tmp, i);
 427
 428                if (outputs_read)
 429                   mark_used_slot(var, out_slots_read_tmp, i);
 430             }
 431          }
 432       }
 433    }
 434
 435    *slots_used = slots_used_tmp[0];
 436    *out_slots_read = out_slots_read_tmp[0];
 437    *p_slots_used = slots_used_tmp[1];
 438    *p_out_slots_read = out_slots_read_tmp[1];
 439 }
 440
 441 struct varying_component {
 442    nir_variable *var;
 443    uint8_t interp_type;
 444    uint8_t interp_loc;
 445    bool is_32bit;
 446    bool is_patch;
 447    bool is_intra_stage_only;
 448    bool initialised;
 449 };
 450
 451 static int
 452 cmp_varying_component(const void *comp1_v, const void *comp2_v)
 453 {
 454    struct varying_component *comp1 = (struct varying_component *) comp1_v;
 455    struct varying_component *comp2 = (struct varying_component *) comp2_v;
 456
 457    /* We want patches to be order at the end of the array */
 458    if (comp1->is_patch != comp2->is_patch)
 459       return comp1->is_patch ? 1 : -1;
 460
 461    /* We want to try to group together TCS outputs that are only read by other
 462     * TCS invocations and not consumed by the follow stage.
 463     */
 464    if (comp1->is_intra_stage_only != comp2->is_intra_stage_only)
 465       return comp1->is_intra_stage_only ? 1 : -1;
 466
 467    /* We can only pack varyings with matching interpolation types so group
 468     * them together.
 469     */
 470    if (comp1->interp_type != comp2->interp_type)
 471       return comp1->interp_type - comp2->interp_type;
 472
 473    /* Interpolation loc must match also. */
 474    if (comp1->interp_loc != comp2->interp_loc)
 475       return comp1->interp_loc - comp2->interp_loc;
 476
 477    /* If everything else matches just use the original location to sort */
 478    return comp1->var->data.location - comp2->var->data.location;
 479 }
 480
 481 static void
 482 gather_varying_component_info(nir_shader *producer, nir_shader *consumer,
 483                               struct varying_component **varying_comp_info,
 484                               unsigned *varying_comp_info_size,
 485                               bool default_to_smooth_interp)
 486 {
 487    unsigned store_varying_info_idx[MAX_VARYINGS_INCL_PATCH][4] = {{0}};
 488    unsigned num_of_comps_to_pack = 0;
 489
 490    /* Count the number of varying that can be packed and create a mapping
 491     * of those varyings to the array we will pass to qsort.
 492     */
 493    nir_foreach_shader_out_variable(var, producer) {
 494
 495       /* Only remap things that aren't builtins. */
 496       if (var->data.location >= VARYING_SLOT_VAR0 &&
 497           var->data.location - VARYING_SLOT_VAR0 < MAX_VARYINGS_INCL_PATCH) {
 498
 499          /* We can't repack xfb varyings. */
 500          if (var->data.always_active_io)
 501             continue;
 502
 503          const struct glsl_type *type = var->type;
 504          if (nir_is_per_vertex_io(var, producer->info.stage) || var->data.per_view) {
 505             assert(glsl_type_is_array(type));
 506             type = glsl_get_array_element(type);
 507          }
 508
 509          if (!is_packing_supported_for_type(type))
 510             continue;
 511
 512          unsigned loc = var->data.location - VARYING_SLOT_VAR0;
 513          store_varying_info_idx[loc][var->data.location_frac] =
 514             ++num_of_comps_to_pack;
 515       }
 516    }
 517
 518    *varying_comp_info_size = num_of_comps_to_pack;
 519    *varying_comp_info = rzalloc_array(NULL, struct varying_component,
 520                                       num_of_comps_to_pack);
 521
 522    nir_function_impl *impl = nir_shader_get_entrypoint(consumer);
 523
 524    /* Walk over the shader and populate the varying component info array */
 525    nir_foreach_block(block, impl) {
 526       nir_foreach_instr(instr, block) {
 527          if (instr->type != nir_instr_type_intrinsic)
 528             continue;
 529
 530          nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
 531          if (intr->intrinsic != nir_intrinsic_load_deref &&
 532              intr->intrinsic != nir_intrinsic_interp_deref_at_centroid &&
 533              intr->intrinsic != nir_intrinsic_interp_deref_at_sample &&
 534              intr->intrinsic != nir_intrinsic_interp_deref_at_offset &&
 535              intr->intrinsic != nir_intrinsic_interp_deref_at_vertex)
 536             continue;
 537
 538          nir_deref_instr *deref = nir_src_as_deref(intr->src[0]);
 539          if (deref->mode != nir_var_shader_in)
 540             continue;
 541
 542          /* We only remap things that aren't builtins. */
 543          nir_variable *in_var = nir_deref_instr_get_variable(deref);
 544          if (in_var->data.location < VARYING_SLOT_VAR0)
 545             continue;
 546
 547          unsigned location = in_var->data.location - VARYING_SLOT_VAR0;
 548          if (location >= MAX_VARYINGS_INCL_PATCH)
 549             continue;
 550
 551          unsigned var_info_idx =
 552             store_varying_info_idx[location][in_var->data.location_frac];
 553          if (!var_info_idx)
 554             continue;
 555
 556          struct varying_component *vc_info =
 557             &(*varying_comp_info)[var_info_idx-1];
 558
 559          if (!vc_info->initialised) {
 560             const struct glsl_type *type = in_var->type;
 561             if (nir_is_per_vertex_io(in_var, consumer->info.stage) ||
 562                 in_var->data.per_view) {
 563                assert(glsl_type_is_array(type));
 564                type = glsl_get_array_element(type);
 565             }
 566
 567             vc_info->var = in_var;
 568             vc_info->interp_type =
 569                get_interp_type(in_var, type, default_to_smooth_interp);
 570             vc_info->interp_loc = get_interp_loc(in_var);
 571             vc_info->is_32bit = glsl_type_is_32bit(type);
 572             vc_info->is_patch = in_var->data.patch;
 573             vc_info->is_intra_stage_only = false;
 574             vc_info->initialised = true;
 575          }
 576       }
 577    }
 578
 579    /* Walk over the shader and populate the varying component info array
 580     * for varyings which are read by other TCS instances but are not consumed
 581     * by the TES.
 582     */
 583    if (producer->info.stage == MESA_SHADER_TESS_CTRL) {
 584       impl = nir_shader_get_entrypoint(producer);
 585
 586       nir_foreach_block(block, impl) {
 587          nir_foreach_instr(instr, block) {
 588             if (instr->type != nir_instr_type_intrinsic)
 589                continue;
 590
 591             nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
 592             if (intr->intrinsic != nir_intrinsic_load_deref)
 593                continue;
 594
 595             nir_deref_instr *deref = nir_src_as_deref(intr->src[0]);
 596             if (deref->mode != nir_var_shader_out)
 597                continue;
 598
 599             /* We only remap things that aren't builtins. */
 600             nir_variable *out_var = nir_deref_instr_get_variable(deref);
 601             if (out_var->data.location < VARYING_SLOT_VAR0)
 602                continue;
 603
 604             unsigned location = out_var->data.location - VARYING_SLOT_VAR0;
 605             if (location >= MAX_VARYINGS_INCL_PATCH)
 606                continue;
 607
 608             unsigned var_info_idx =
 609                store_varying_info_idx[location][out_var->data.location_frac];
 610             if (!var_info_idx) {
 611                /* Something went wrong, the shader interfaces didn't match, so
 612                 * abandon packing. This can happen for example when the
 613                 * inputs are scalars but the outputs are struct members.
 614                 */
 615                *varying_comp_info_size = 0;
 616                break;
 617             }
 618
 619             struct varying_component *vc_info =
 620                &(*varying_comp_info)[var_info_idx-1];
 621
 622             if (!vc_info->initialised) {
 623                const struct glsl_type *type = out_var->type;
 624                if (nir_is_per_vertex_io(out_var, producer->info.stage)) {
 625                   assert(glsl_type_is_array(type));
 626                   type = glsl_get_array_element(type);
 627                }
 628
 629                vc_info->var = out_var;
 630                vc_info->interp_type =
 631                   get_interp_type(out_var, type, default_to_smooth_interp);
 632                vc_info->interp_loc = get_interp_loc(out_var);
 633                vc_info->is_32bit = glsl_type_is_32bit(type);
 634                vc_info->is_patch = out_var->data.patch;
 635                vc_info->is_intra_stage_only = true;
 636                vc_info->initialised = true;
 637             }
 638          }
 639       }
 640    }
 641
 642    for (unsigned i = 0; i < *varying_comp_info_size; i++ ) {
 643       struct varying_component *vc_info = &(*varying_comp_info)[i];
 644       if (!vc_info->initialised) {
 645          /* Something went wrong, the shader interfaces didn't match, so
 646           * abandon packing. This can happen for example when the outputs are
 647           * scalars but the inputs are struct members.
 648           */
 649          *varying_comp_info_size = 0;
 650          break;
 651       }
 652    }
 653 }
 654
 655 static void
 656 assign_remap_locations(struct varying_loc (*remap)[4],
 657                        struct assigned_comps *assigned_comps,
 658                        struct varying_component *info,
 659                        unsigned *cursor, unsigned *comp,
 660                        unsigned max_location)
 661 {
 662    unsigned tmp_cursor = *cursor;
 663    unsigned tmp_comp = *comp;
 664
 665    for (; tmp_cursor < max_location; tmp_cursor++) {
 666
 667       if (assigned_comps[tmp_cursor].comps) {
 668          /* We can only pack varyings with matching interpolation types,
 669           * interpolation loc must match also.
 670           * TODO: i965 can handle interpolation locations that don't match,
 671           * but the radeonsi nir backend handles everything as vec4s and so
 672           * expects this to be the same for all components. We could make this
 673           * check driver specfific or drop it if NIR ever become the only
 674           * radeonsi backend.
 675           */
 676          if (assigned_comps[tmp_cursor].interp_type != info->interp_type ||
 677              assigned_comps[tmp_cursor].interp_loc != info->interp_loc) {
 678             tmp_comp = 0;
 679             continue;
 680          }
 681
 682          /* We can only pack varyings with matching types, and the current
 683           * algorithm only supports packing 32-bit.
 684           */
 685          if (!assigned_comps[tmp_cursor].is_32bit) {
 686             tmp_comp = 0;
 687             continue;
 688          }
 689
 690          while (tmp_comp < 4 &&
 691                 (assigned_comps[tmp_cursor].comps & (1 << tmp_comp))) {
 692             tmp_comp++;
 693          }
 694       }
 695
 696       if (tmp_comp == 4) {
 697          tmp_comp = 0;
 698          continue;
 699       }
 700
 701       unsigned location = info->var->data.location - VARYING_SLOT_VAR0;
 702
 703       /* Once we have assigned a location mark it as used */
 704       assigned_comps[tmp_cursor].comps |= (1 << tmp_comp);
 705       assigned_comps[tmp_cursor].interp_type = info->interp_type;
 706       assigned_comps[tmp_cursor].interp_loc = info->interp_loc;
 707       assigned_comps[tmp_cursor].is_32bit = info->is_32bit;
 708
 709       /* Assign remap location */
 710       remap[location][info->var->data.location_frac].component = tmp_comp++;
 711       remap[location][info->var->data.location_frac].location =
 712          tmp_cursor + VARYING_SLOT_VAR0;
 713
 714       break;
 715    }
 716
 717    *cursor = tmp_cursor;
 718    *comp = tmp_comp;
 719 }
 720
 721 /* If there are empty components in the slot compact the remaining components
 722  * as close to component 0 as possible. This will make it easier to fill the
 723  * empty components with components from a different slot in a following pass.
 724  */
 725 static void
 726 compact_components(nir_shader *producer, nir_shader *consumer,
 727                    struct assigned_comps *assigned_comps,
 728                    bool default_to_smooth_interp)
 729 {
 730    struct varying_loc remap[MAX_VARYINGS_INCL_PATCH][4] = {{{0}, {0}}};
 731    struct varying_component *varying_comp_info;
 732    unsigned varying_comp_info_size;
 733
 734    /* Gather varying component info */
 735    gather_varying_component_info(producer, consumer, &varying_comp_info,
 736                                  &varying_comp_info_size,
 737                                  default_to_smooth_interp);
 738
 739    /* Sort varying components. */
 740    qsort(varying_comp_info, varying_comp_info_size,
 741          sizeof(struct varying_component), cmp_varying_component);
 742
 743    unsigned cursor = 0;
 744    unsigned comp = 0;
 745
 746    /* Set the remap array based on the sorted components */
 747    for (unsigned i = 0; i < varying_comp_info_size; i++ ) {
 748       struct varying_component *info = &varying_comp_info[i];
 749
 750       assert(info->is_patch || cursor < MAX_VARYING);
 751       if (info->is_patch) {
 752          /* The list should be sorted with all non-patch inputs first followed
 753           * by patch inputs.  When we hit our first patch input, we need to
 754           * reset the cursor to MAX_VARYING so we put them in the right slot.
 755           */
 756          if (cursor < MAX_VARYING) {
 757             cursor = MAX_VARYING;
 758             comp = 0;
 759          }
 760
 761          assign_remap_locations(remap, assigned_comps, info,
 762                                 &cursor, &comp, MAX_VARYINGS_INCL_PATCH);
 763       } else {
 764          assign_remap_locations(remap, assigned_comps, info,
 765                                 &cursor, &comp, MAX_VARYING);
 766
 767          /* Check if we failed to assign a remap location. This can happen if
 768           * for example there are a bunch of unmovable components with
 769           * mismatching interpolation types causing us to skip over locations
 770           * that would have been useful for packing later components.
 771           * The solution is to iterate over the locations again (this should
 772           * happen very rarely in practice).
 773           */
 774          if (cursor == MAX_VARYING) {
 775             cursor = 0;
 776             comp = 0;
 777             assign_remap_locations(remap, assigned_comps, info,
 778                                    &cursor, &comp, MAX_VARYING);
 779          }
 780       }
 781    }
 782
 783    ralloc_free(varying_comp_info);
 784
 785    uint64_t zero = 0;
 786    uint32_t zero32 = 0;
 787    remap_slots_and_components(consumer, nir_var_shader_in, remap,
 788                               &consumer->info.inputs_read, &zero,
 789                               &consumer->info.patch_inputs_read, &zero32);
 790    remap_slots_and_components(producer, nir_var_shader_out, remap,
 791                               &producer->info.outputs_written,
 792                               &producer->info.outputs_read,
 793                               &producer->info.patch_outputs_written,
 794                               &producer->info.patch_outputs_read);
 795 }
 796
 797 /* We assume that this has been called more-or-less directly after
 798  * remove_unused_varyings.  At this point, all of the varyings that we
 799  * aren't going to be using have been completely removed and the
 800  * inputs_read and outputs_written fields in nir_shader_info reflect
 801  * this.  Therefore, the total set of valid slots is the OR of the two
 802  * sets of varyings;  this accounts for varyings which one side may need
 803  * to read/write even if the other doesn't.  This can happen if, for
 804  * instance, an array is used indirectly from one side causing it to be
 805  * unsplittable but directly from the other.
 806  */
 807 void
 808 nir_compact_varyings(nir_shader *producer, nir_shader *consumer,
 809                      bool default_to_smooth_interp)
 810 {
 811    assert(producer->info.stage != MESA_SHADER_FRAGMENT);
 812    assert(consumer->info.stage != MESA_SHADER_VERTEX);
 813
 814    struct assigned_comps assigned_comps[MAX_VARYINGS_INCL_PATCH] = {{0}};
 815
 816    get_unmoveable_components_masks(producer, nir_var_shader_out,
 817                                    assigned_comps,
 818                                    producer->info.stage,
 819                                    default_to_smooth_interp);
 820    get_unmoveable_components_masks(consumer, nir_var_shader_in,
 821                                    assigned_comps,
 822                                    consumer->info.stage,
 823                                    default_to_smooth_interp);
 824
 825    compact_components(producer, consumer, assigned_comps,
 826                       default_to_smooth_interp);
 827 }
 828
 829 /*
 830  * Mark XFB varyings as always_active_io in the consumer so the linking opts
 831  * don't touch them.
 832  */
 833 void
 834 nir_link_xfb_varyings(nir_shader *producer, nir_shader *consumer)
 835 {
 836    nir_variable *input_vars[MAX_VARYING] = { 0 };
 837
 838    nir_foreach_shader_in_variable(var, consumer) {
 839       if (var->data.location >= VARYING_SLOT_VAR0 &&
 840           var->data.location - VARYING_SLOT_VAR0 < MAX_VARYING) {
 841
 842          unsigned location = var->data.location - VARYING_SLOT_VAR0;
 843          input_vars[location] = var;
 844       }
 845    }
 846
 847    nir_foreach_shader_out_variable(var, producer) {
 848       if (var->data.location >= VARYING_SLOT_VAR0 &&
 849           var->data.location - VARYING_SLOT_VAR0 < MAX_VARYING) {
 850
 851          if (!var->data.always_active_io)
 852             continue;
 853
 854          unsigned location = var->data.location - VARYING_SLOT_VAR0;
 855          if (input_vars[location]) {
 856             input_vars[location]->data.always_active_io = true;
 857          }
 858       }
 859    }
 860 }
 861
 862 static bool
 863 does_varying_match(nir_variable *out_var, nir_variable *in_var)
 864 {
 865    return in_var->data.location == out_var->data.location &&
 866           in_var->data.location_frac == out_var->data.location_frac;
 867 }
 868
 869 static nir_variable *
 870 get_matching_input_var(nir_shader *consumer, nir_variable *out_var)
 871 {
 872    nir_foreach_shader_in_variable(var, consumer) {
 873       if (does_varying_match(out_var, var))
 874          return var;
 875    }
 876
 877    return NULL;
 878 }
 879
 880 static bool
 881 can_replace_varying(nir_variable *out_var)
 882 {
 883    /* Skip types that require more complex handling.
 884     * TODO: add support for these types.
 885     */
 886    if (glsl_type_is_array(out_var->type) ||
 887        glsl_type_is_dual_slot(out_var->type) ||
 888        glsl_type_is_matrix(out_var->type) ||
 889        glsl_type_is_struct_or_ifc(out_var->type))
 890       return false;
 891
 892    /* Limit this pass to scalars for now to keep things simple. Most varyings
 893     * should have been lowered to scalars at this point anyway.
 894     */
 895    if (!glsl_type_is_scalar(out_var->type))
 896       return false;
 897
 898    if (out_var->data.location < VARYING_SLOT_VAR0 ||
 899        out_var->data.location - VARYING_SLOT_VAR0 >= MAX_VARYING)
 900       return false;
 901
 902    return true;
 903 }
 904
 905 static bool
 906 replace_constant_input(nir_shader *shader, nir_intrinsic_instr *store_intr)
 907 {
 908    nir_function_impl *impl = nir_shader_get_entrypoint(shader);
 909
 910    nir_builder b;
 911    nir_builder_init(&b, impl);
 912
 913    nir_variable *out_var =
 914       nir_deref_instr_get_variable(nir_src_as_deref(store_intr->src[0]));
 915
 916    bool progress = false;
 917    nir_foreach_block(block, impl) {
 918       nir_foreach_instr(instr, block) {
 919          if (instr->type != nir_instr_type_intrinsic)
 920             continue;
 921
 922          nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
 923          if (intr->intrinsic != nir_intrinsic_load_deref)
 924             continue;
 925
 926          nir_deref_instr *in_deref = nir_src_as_deref(intr->src[0]);
 927          if (in_deref->mode != nir_var_shader_in)
 928             continue;
 929
 930          nir_variable *in_var = nir_deref_instr_get_variable(in_deref);
 931
 932          if (!does_varying_match(out_var, in_var))
 933             continue;
 934
 935          b.cursor = nir_before_instr(instr);
 936
 937          nir_load_const_instr *out_const =
 938             nir_instr_as_load_const(store_intr->src[1].ssa->parent_instr);
 939
 940          /* Add new const to replace the input */
 941          nir_ssa_def *nconst = nir_build_imm(&b, store_intr->num_components,
 942                                              intr->dest.ssa.bit_size,
 943                                              out_const->value);
 944
 945          nir_ssa_def_rewrite_uses(&intr->dest.ssa, nir_src_for_ssa(nconst));
 946
 947          progress = true;
 948       }
 949    }
 950
 951    return progress;
 952 }
 953
 954 static bool
 955 replace_duplicate_input(nir_shader *shader, nir_variable *input_var,
 956                          nir_intrinsic_instr *dup_store_intr)
 957 {
 958    assert(input_var);
 959
 960    nir_function_impl *impl = nir_shader_get_entrypoint(shader);
 961
 962    nir_builder b;
 963    nir_builder_init(&b, impl);
 964
 965    nir_variable *dup_out_var =
 966       nir_deref_instr_get_variable(nir_src_as_deref(dup_store_intr->src[0]));
 967
 968    bool progress = false;
 969    nir_foreach_block(block, impl) {
 970       nir_foreach_instr(instr, block) {
 971          if (instr->type != nir_instr_type_intrinsic)
 972             continue;
 973
 974          nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
 975          if (intr->intrinsic != nir_intrinsic_load_deref)
 976             continue;
 977
 978          nir_deref_instr *in_deref = nir_src_as_deref(intr->src[0]);
 979          if (in_deref->mode != nir_var_shader_in)
 980             continue;
 981
 982          nir_variable *in_var = nir_deref_instr_get_variable(in_deref);
 983
 984          if (!does_varying_match(dup_out_var, in_var) ||
 985              in_var->data.interpolation != input_var->data.interpolation ||
 986              get_interp_loc(in_var) != get_interp_loc(input_var))
 987             continue;
 988
 989          b.cursor = nir_before_instr(instr);
 990
 991          nir_ssa_def *load = nir_load_var(&b, input_var);
 992          nir_ssa_def_rewrite_uses(&intr->dest.ssa, nir_src_for_ssa(load));
 993
 994          progress = true;
 995       }
 996    }
 997
 998    return progress;
 999 }
1000
1001 bool
1002 nir_link_opt_varyings(nir_shader *producer, nir_shader *consumer)
1003 {
1004    /* TODO: Add support for more shader stage combinations */
1005    if (consumer->info.stage != MESA_SHADER_FRAGMENT ||
1006        (producer->info.stage != MESA_SHADER_VERTEX &&
1007         producer->info.stage != MESA_SHADER_TESS_EVAL))
1008       return false;
1009
1010    bool progress = false;
1011
1012    nir_function_impl *impl = nir_shader_get_entrypoint(producer);
1013
1014    struct hash_table *varying_values = _mesa_pointer_hash_table_create(NULL);
1015
1016    /* If we find a store in the last block of the producer we can be sure this
1017     * is the only possible value for this output.
1018     */
1019    nir_block *last_block = nir_impl_last_block(impl);
1020    nir_foreach_instr_reverse(instr, last_block) {
1021       if (instr->type != nir_instr_type_intrinsic)
1022          continue;
1023
1024       nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
1025
1026       if (intr->intrinsic != nir_intrinsic_store_deref)
1027          continue;
1028
1029       nir_deref_instr *out_deref = nir_src_as_deref(intr->src[0]);
1030       if (out_deref->mode != nir_var_shader_out)
1031          continue;
1032
1033       nir_variable *out_var = nir_deref_instr_get_variable(out_deref);
1034       if (!can_replace_varying(out_var))
1035          continue;
1036
1037       if (intr->src[1].ssa->parent_instr->type == nir_instr_type_load_const) {
1038          progress |= replace_constant_input(consumer, intr);
1039       } else {
1040          struct hash_entry *entry =
1041                _mesa_hash_table_search(varying_values, intr->src[1].ssa);
1042          if (entry) {
1043             progress |= replace_duplicate_input(consumer,
1044                                                 (nir_variable *) entry->data,
1045                                                 intr);
1046          } else {
1047             nir_variable *in_var = get_matching_input_var(consumer, out_var);
1048             if (in_var) {
1049                _mesa_hash_table_insert(varying_values, intr->src[1].ssa,
1050                                        in_var);
1051             }
1052          }
1053       }
1054    }
1055
1056    _mesa_hash_table_destroy(varying_values, NULL);
1057
1058    return progress;
1059 }
1060
1061 /* TODO any better helper somewhere to sort a list? */
1062
1063 static void
1064 insert_sorted(struct exec_list *var_list, nir_variable *new_var)
1065 {
1066    nir_foreach_variable_in_list(var, var_list) {
1067       if (var->data.location > new_var->data.location) {
1068          exec_node_insert_node_before(&var->node, &new_var->node);
1069          return;
1070       }
1071    }
1072    exec_list_push_tail(var_list, &new_var->node);
1073 }
1074
1075 static void
1076 sort_varyings(nir_shader *shader, nir_variable_mode mode,
1077               struct exec_list *sorted_list)
1078 {
1079    exec_list_make_empty(sorted_list);
1080    nir_foreach_variable_with_modes_safe(var, shader, mode) {
1081       exec_node_remove(&var->node);
1082       insert_sorted(sorted_list, var);
1083    }
1084 }
1085
1086 void
1087 nir_assign_io_var_locations(nir_shader *shader, nir_variable_mode mode,
1088                             unsigned *size, gl_shader_stage stage)
1089 {
1090    unsigned location = 0;
1091    unsigned assigned_locations[VARYING_SLOT_TESS_MAX];
1092    uint64_t processed_locs[2] = {0};
1093
1094    struct exec_list io_vars;
1095    sort_varyings(shader, mode, &io_vars);
1096
1097    int UNUSED last_loc = 0;
1098    bool last_partial = false;
1099    nir_foreach_variable_in_list(var, &io_vars) {
1100       const struct glsl_type *type = var->type;
1101       if (nir_is_per_vertex_io(var, stage) || var->data.per_view) {
1102          assert(glsl_type_is_array(type));
1103          type = glsl_get_array_element(type);
1104       }
1105
1106       int base;
1107       if (var->data.mode == nir_var_shader_in && stage == MESA_SHADER_VERTEX)
1108          base = VERT_ATTRIB_GENERIC0;
1109       else if (var->data.mode == nir_var_shader_out &&
1110                stage == MESA_SHADER_FRAGMENT)
1111          base = FRAG_RESULT_DATA0;
1112       else
1113          base = VARYING_SLOT_VAR0;
1114
1115       unsigned var_size;
1116       if (var->data.compact) {
1117          /* If we are inside a partial compact,
1118           * don't allow another compact to be in this slot
1119           * if it starts at component 0.
1120           */
1121          if (last_partial && var->data.location_frac == 0) {
1122             location++;
1123          }
1124
1125          /* compact variables must be arrays of scalars */
1126          assert(glsl_type_is_array(type));
1127          assert(glsl_type_is_scalar(glsl_get_array_element(type)));
1128          unsigned start = 4 * location + var->data.location_frac;
1129          unsigned end = start + glsl_get_length(type);
1130          var_size = end / 4 - location;
1131          last_partial = end % 4 != 0;
1132       } else {
1133          /* Compact variables bypass the normal varying compacting pass,
1134           * which means they cannot be in the same vec4 slot as a normal
1135           * variable. If part of the current slot is taken up by a compact
1136           * variable, we need to go to the next one.
1137           */
1138          if (last_partial) {
1139             location++;
1140             last_partial = false;
1141          }
1142          var_size = glsl_count_attribute_slots(type, false);
1143       }
1144
1145       /* Builtins don't allow component packing so we only need to worry about
1146        * user defined varyings sharing the same location.
1147        */
1148       bool processed = false;
1149       if (var->data.location >= base) {
1150          unsigned glsl_location = var->data.location - base;
1151
1152          for (unsigned i = 0; i < var_size; i++) {
1153             if (processed_locs[var->data.index] &
1154                 ((uint64_t)1 << (glsl_location + i)))
1155                processed = true;
1156             else
1157                processed_locs[var->data.index] |=
1158                   ((uint64_t)1 << (glsl_location + i));
1159          }
1160       }
1161
1162       /* Because component packing allows varyings to share the same location
1163        * we may have already have processed this location.
1164        */
1165       if (processed) {
1166          unsigned driver_location = assigned_locations[var->data.location];
1167          var->data.driver_location = driver_location;
1168
1169          /* An array may be packed such that is crosses multiple other arrays
1170           * or variables, we need to make sure we have allocated the elements
1171           * consecutively if the previously proccessed var was shorter than
1172           * the current array we are processing.
1173           *
1174           * NOTE: The code below assumes the var list is ordered in ascending
1175           * location order.
1176           */
1177          assert(last_loc <= var->data.location);
1178          last_loc = var->data.location;
1179          unsigned last_slot_location = driver_location + var_size;
1180          if (last_slot_location > location) {
1181             unsigned num_unallocated_slots = last_slot_location - location;
1182             unsigned first_unallocated_slot = var_size - num_unallocated_slots;
1183             for (unsigned i = first_unallocated_slot; i < var_size; i++) {
1184                assigned_locations[var->data.location + i] = location;
1185                location++;
1186             }
1187          }
1188          continue;
1189       }
1190
1191       for (unsigned i = 0; i < var_size; i++) {
1192          assigned_locations[var->data.location + i] = location + i;
1193       }
1194
1195       var->data.driver_location = location;
1196       location += var_size;
1197    }
1198
1199    if (last_partial)
1200       location++;
1201
1202    exec_list_append(&shader->variables, &io_vars);
1203    *size = location;
1204 }
1205
1206 static uint64_t
1207 get_linked_variable_location(unsigned location, bool patch)
1208 {
1209    if (!patch)
1210       return location;
1211
1212    /* Reserve locations 0...3 for special patch variables
1213     * like tess factors and bounding boxes, and the generic patch
1214     * variables will come after them.
1215     */
1216    if (location >= VARYING_SLOT_PATCH0)
1217       return location - VARYING_SLOT_PATCH0 + 4;
1218    else if (location >= VARYING_SLOT_TESS_LEVEL_OUTER &&
1219             location <= VARYING_SLOT_BOUNDING_BOX1)
1220       return location - VARYING_SLOT_TESS_LEVEL_OUTER;
1221    else
1222       unreachable("Unsupported variable in get_linked_variable_location.");
1223 }
1224
1225 static uint64_t
1226 get_linked_variable_io_mask(nir_variable *variable, gl_shader_stage stage)
1227 {
1228    const struct glsl_type *type = variable->type;
1229
1230    if (nir_is_per_vertex_io(variable, stage)) {
1231       assert(glsl_type_is_array(type));
1232       type = glsl_get_array_element(type);
1233    }
1234
1235    unsigned slots = glsl_count_attribute_slots(type, false);
1236    if (variable->data.compact) {
1237       unsigned component_count = variable->data.location_frac + glsl_get_length(type);
1238       slots = DIV_ROUND_UP(component_count, 4);
1239    }
1240
1241    uint64_t mask = u_bit_consecutive64(0, slots);
1242    return mask;
1243 }
1244
1245 nir_linked_io_var_info
1246 nir_assign_linked_io_var_locations(nir_shader *producer, nir_shader *consumer)
1247 {
1248    assert(producer);
1249    assert(consumer);
1250
1251    uint64_t producer_output_mask = 0;
1252    uint64_t producer_patch_output_mask = 0;
1253
1254    nir_foreach_shader_out_variable(variable, producer) {
1255       uint64_t mask = get_linked_variable_io_mask(variable, producer->info.stage);
1256       uint64_t loc = get_linked_variable_location(variable->data.location, variable->data.patch);
1257
1258       if (variable->data.patch)
1259          producer_patch_output_mask |= mask << loc;
1260       else
1261          producer_output_mask |= mask << loc;
1262    }
1263
1264    uint64_t consumer_input_mask = 0;
1265    uint64_t consumer_patch_input_mask = 0;
1266
1267    nir_foreach_shader_in_variable(variable, consumer) {
1268       uint64_t mask = get_linked_variable_io_mask(variable, consumer->info.stage);
1269       uint64_t loc = get_linked_variable_location(variable->data.location, variable->data.patch);
1270
1271       if (variable->data.patch)
1272          consumer_patch_input_mask |= mask << loc;
1273       else
1274          consumer_input_mask |= mask << loc;
1275    }
1276
1277    uint64_t io_mask = producer_output_mask | consumer_input_mask;
1278    uint64_t patch_io_mask = producer_patch_output_mask | consumer_patch_input_mask;
1279
1280    nir_foreach_shader_out_variable(variable, producer) {
1281       uint64_t loc = get_linked_variable_location(variable->data.location, variable->data.patch);
1282
1283       if (variable->data.patch)
1284          variable->data.driver_location = util_bitcount64(patch_io_mask & u_bit_consecutive64(0, loc)) * 4;
1285       else
1286          variable->data.driver_location = util_bitcount64(io_mask & u_bit_consecutive64(0, loc)) * 4;
1287    }
1288
1289    nir_foreach_shader_in_variable(variable, consumer) {
1290       uint64_t loc = get_linked_variable_location(variable->data.location, variable->data.patch);
1291
1292       if (variable->data.patch)
1293          variable->data.driver_location = util_bitcount64(patch_io_mask & u_bit_consecutive64(0, loc)) * 4;
1294       else
1295          variable->data.driver_location = util_bitcount64(io_mask & u_bit_consecutive64(0, loc)) * 4;
1296    }
1297
1298    nir_linked_io_var_info result = {
1299       .num_linked_io_vars = util_bitcount64(io_mask),
1300       .num_linked_patch_io_vars = util_bitcount64(patch_io_mask),
1301    };
1302
1303    return result;
1304 }