src/compiler/nir/nir_linking_helpers.c

   1 /*
   2  * Copyright © 2015 Intel Corporation
   3  *
   4  * Permission is hereby granted, free of charge, to any person obtaining a
   5  * copy of this software and associated documentation files (the "Software"),
   6  * to deal in the Software without restriction, including without limitation
   7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8  * and/or sell copies of the Software, and to permit persons to whom the
   9  * Software is furnished to do so, subject to the following conditions:
  10  *
  11  * The above copyright notice and this permission notice (including the next
  12  * paragraph) shall be included in all copies or substantial portions of the
  13  * Software.
  14  *
  15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  21  * IN THE SOFTWARE.
  22  */
  23
  24 #include "nir.h"
  25 #include "nir_builder.h"
  26 #include "util/set.h"
  27 #include "util/hash_table.h"
  28
  29 /* This file contains various little helpers for doing simple linking in
  30  * NIR.  Eventually, we'll probably want a full-blown varying packing
  31  * implementation in here.  Right now, it just deletes unused things.
  32  */
  33
  34 /**
  35  * Returns the bits in the inputs_read, outputs_written, or
  36  * system_values_read bitfield corresponding to this variable.
  37  */
  38 static uint64_t
  39 get_variable_io_mask(nir_variable *var, gl_shader_stage stage)
  40 {
  41    if (var->data.location < 0)
  42       return 0;
  43
  44    unsigned location = var->data.patch ?
  45       var->data.location - VARYING_SLOT_PATCH0 : var->data.location;
  46
  47    assert(var->data.mode == nir_var_shader_in ||
  48           var->data.mode == nir_var_shader_out ||
  49           var->data.mode == nir_var_system_value);
  50    assert(var->data.location >= 0);
  51
  52    const struct glsl_type *type = var->type;
  53    if (nir_is_per_vertex_io(var, stage) || var->data.per_view) {
  54       assert(glsl_type_is_array(type));
  55       type = glsl_get_array_element(type);
  56    }
  57
  58    unsigned slots = glsl_count_attribute_slots(type, false);
  59    return ((1ull << slots) - 1) << location;
  60 }
  61
  62 static uint8_t
  63 get_num_components(nir_variable *var)
  64 {
  65    if (glsl_type_is_struct_or_ifc(glsl_without_array(var->type)))
  66       return 4;
  67
  68    return glsl_get_vector_elements(glsl_without_array(var->type));
  69 }
  70
  71 static void
  72 tcs_add_output_reads(nir_shader *shader, uint64_t *read, uint64_t *patches_read)
  73 {
  74    nir_foreach_function(function, shader) {
  75       if (!function->impl)
  76          continue;
  77
  78       nir_foreach_block(block, function->impl) {
  79          nir_foreach_instr(instr, block) {
  80             if (instr->type != nir_instr_type_intrinsic)
  81                continue;
  82
  83             nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
  84             if (intrin->intrinsic != nir_intrinsic_load_deref)
  85                continue;
  86
  87             nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
  88             if (deref->mode != nir_var_shader_out)
  89                continue;
  90
  91             nir_variable *var = nir_deref_instr_get_variable(deref);
  92             for (unsigned i = 0; i < get_num_components(var); i++) {
  93                if (var->data.patch) {
  94                   patches_read[var->data.location_frac + i] |=
  95                      get_variable_io_mask(var, shader->info.stage);
  96                } else {
  97                   read[var->data.location_frac + i] |=
  98                      get_variable_io_mask(var, shader->info.stage);
  99                }
 100             }
 101          }
 102       }
 103    }
 104 }
 105
 106 /**
 107  * Helper for removing unused shader I/O variables, by demoting them to global
 108  * variables (which may then by dead code eliminated).
 109  *
 110  * Example usage is:
 111  *
 112  * progress = nir_remove_unused_io_vars(producer, nir_var_shader_out,
 113  *                                      read, patches_read) ||
 114  *                                      progress;
 115  *
 116  * The "used" should be an array of 4 uint64_ts (probably of VARYING_BIT_*)
 117  * representing each .location_frac used.  Note that for vector variables,
 118  * only the first channel (.location_frac) is examined for deciding if the
 119  * variable is used!
 120  */
 121 bool
 122 nir_remove_unused_io_vars(nir_shader *shader,
 123                           nir_variable_mode mode,
 124                           uint64_t *used_by_other_stage,
 125                           uint64_t *used_by_other_stage_patches)
 126 {
 127    bool progress = false;
 128    uint64_t *used;
 129
 130    assert(mode == nir_var_shader_in || mode == nir_var_shader_out);
 131    struct exec_list *var_list = nir_variable_list_for_mode(shader, mode);
 132
 133    nir_foreach_variable_safe(var, var_list) {
 134       if (var->data.patch)
 135          used = used_by_other_stage_patches;
 136       else
 137          used = used_by_other_stage;
 138
 139       if (var->data.location < VARYING_SLOT_VAR0 && var->data.location >= 0)
 140          continue;
 141
 142       if (var->data.always_active_io)
 143          continue;
 144
 145       if (var->data.explicit_xfb_buffer)
 146          continue;
 147
 148       uint64_t other_stage = used[var->data.location_frac];
 149
 150       if (!(other_stage & get_variable_io_mask(var, shader->info.stage))) {
 151          /* This one is invalid, make it a global variable instead */
 152          var->data.location = 0;
 153          var->data.mode = nir_var_shader_temp;
 154
 155          exec_node_remove(&var->node);
 156          exec_list_push_tail(&shader->globals, &var->node);
 157
 158          progress = true;
 159       }
 160    }
 161
 162    if (progress)
 163       nir_fixup_deref_modes(shader);
 164
 165    return progress;
 166 }
 167
 168 bool
 169 nir_remove_unused_varyings(nir_shader *producer, nir_shader *consumer)
 170 {
 171    assert(producer->info.stage != MESA_SHADER_FRAGMENT);
 172    assert(consumer->info.stage != MESA_SHADER_VERTEX);
 173
 174    uint64_t read[4] = { 0 }, written[4] = { 0 };
 175    uint64_t patches_read[4] = { 0 }, patches_written[4] = { 0 };
 176
 177    nir_foreach_shader_out_variable(var, producer) {
 178       for (unsigned i = 0; i < get_num_components(var); i++) {
 179          if (var->data.patch) {
 180             patches_written[var->data.location_frac + i] |=
 181                get_variable_io_mask(var, producer->info.stage);
 182          } else {
 183             written[var->data.location_frac + i] |=
 184                get_variable_io_mask(var, producer->info.stage);
 185          }
 186       }
 187    }
 188
 189    nir_foreach_shader_in_variable(var, consumer) {
 190       for (unsigned i = 0; i < get_num_components(var); i++) {
 191          if (var->data.patch) {
 192             patches_read[var->data.location_frac + i] |=
 193                get_variable_io_mask(var, consumer->info.stage);
 194          } else {
 195             read[var->data.location_frac + i] |=
 196                get_variable_io_mask(var, consumer->info.stage);
 197          }
 198       }
 199    }
 200
 201    /* Each TCS invocation can read data written by other TCS invocations,
 202     * so even if the outputs are not used by the TES we must also make
 203     * sure they are not read by the TCS before demoting them to globals.
 204     */
 205    if (producer->info.stage == MESA_SHADER_TESS_CTRL)
 206       tcs_add_output_reads(producer, read, patches_read);
 207
 208    bool progress = false;
 209    progress = nir_remove_unused_io_vars(producer, nir_var_shader_out, read,
 210                                         patches_read);
 211
 212    progress = nir_remove_unused_io_vars(consumer, nir_var_shader_in, written,
 213                                         patches_written) || progress;
 214
 215    return progress;
 216 }
 217
 218 static uint8_t
 219 get_interp_type(nir_variable *var, const struct glsl_type *type,
 220                 bool default_to_smooth_interp)
 221 {
 222    if (glsl_type_is_integer(type))
 223       return INTERP_MODE_FLAT;
 224    else if (var->data.interpolation != INTERP_MODE_NONE)
 225       return var->data.interpolation;
 226    else if (default_to_smooth_interp)
 227       return INTERP_MODE_SMOOTH;
 228    else
 229       return INTERP_MODE_NONE;
 230 }
 231
 232 #define INTERPOLATE_LOC_SAMPLE 0
 233 #define INTERPOLATE_LOC_CENTROID 1
 234 #define INTERPOLATE_LOC_CENTER 2
 235
 236 static uint8_t
 237 get_interp_loc(nir_variable *var)
 238 {
 239    if (var->data.sample)
 240       return INTERPOLATE_LOC_SAMPLE;
 241    else if (var->data.centroid)
 242       return INTERPOLATE_LOC_CENTROID;
 243    else
 244       return INTERPOLATE_LOC_CENTER;
 245 }
 246
 247 static bool
 248 is_packing_supported_for_type(const struct glsl_type *type)
 249 {
 250    /* We ignore complex types such as arrays, matrices, structs and bitsizes
 251     * other then 32bit. All other vector types should have been split into
 252     * scalar variables by the lower_io_to_scalar pass. The only exception
 253     * should be OpenGL xfb varyings.
 254     * TODO: add support for more complex types?
 255     */
 256    return glsl_type_is_scalar(type) && glsl_type_is_32bit(type);
 257 }
 258
 259 struct assigned_comps
 260 {
 261    uint8_t comps;
 262    uint8_t interp_type;
 263    uint8_t interp_loc;
 264    bool is_32bit;
 265 };
 266
 267 /* Packing arrays and dual slot varyings is difficult so to avoid complex
 268  * algorithms this function just assigns them their existing location for now.
 269  * TODO: allow better packing of complex types.
 270  */
 271 static void
 272 get_unmoveable_components_masks(struct exec_list *var_list,
 273                                 struct assigned_comps *comps,
 274                                 gl_shader_stage stage,
 275                                 bool default_to_smooth_interp)
 276 {
 277    nir_foreach_variable_safe(var, var_list) {
 278       assert(var->data.location >= 0);
 279
 280       /* Only remap things that aren't built-ins. */
 281       if (var->data.location >= VARYING_SLOT_VAR0 &&
 282           var->data.location - VARYING_SLOT_VAR0 < MAX_VARYINGS_INCL_PATCH) {
 283
 284          const struct glsl_type *type = var->type;
 285          if (nir_is_per_vertex_io(var, stage) || var->data.per_view) {
 286             assert(glsl_type_is_array(type));
 287             type = glsl_get_array_element(type);
 288          }
 289
 290          /* If we can pack this varying then don't mark the components as
 291           * used.
 292           */
 293          if (is_packing_supported_for_type(type))
 294             continue;
 295
 296          unsigned location = var->data.location - VARYING_SLOT_VAR0;
 297
 298          unsigned elements =
 299             glsl_type_is_vector_or_scalar(glsl_without_array(type)) ?
 300             glsl_get_vector_elements(glsl_without_array(type)) : 4;
 301
 302          bool dual_slot = glsl_type_is_dual_slot(glsl_without_array(type));
 303          unsigned slots = glsl_count_attribute_slots(type, false);
 304          unsigned dmul = glsl_type_is_64bit(glsl_without_array(type)) ? 2 : 1;
 305          unsigned comps_slot2 = 0;
 306          for (unsigned i = 0; i < slots; i++) {
 307             if (dual_slot) {
 308                if (i & 1) {
 309                   comps[location + i].comps |= ((1 << comps_slot2) - 1);
 310                } else {
 311                   unsigned num_comps = 4 - var->data.location_frac;
 312                   comps_slot2 = (elements * dmul) - num_comps;
 313
 314                   /* Assume ARB_enhanced_layouts packing rules for doubles */
 315                   assert(var->data.location_frac == 0 ||
 316                          var->data.location_frac == 2);
 317                   assert(comps_slot2 <= 4);
 318
 319                   comps[location + i].comps |=
 320                      ((1 << num_comps) - 1) << var->data.location_frac;
 321                }
 322             } else {
 323                comps[location + i].comps |=
 324                   ((1 << (elements * dmul)) - 1) << var->data.location_frac;
 325             }
 326
 327             comps[location + i].interp_type =
 328                get_interp_type(var, type, default_to_smooth_interp);
 329             comps[location + i].interp_loc = get_interp_loc(var);
 330             comps[location + i].is_32bit =
 331                glsl_type_is_32bit(glsl_without_array(type));
 332          }
 333       }
 334    }
 335 }
 336
 337 struct varying_loc
 338 {
 339    uint8_t component;
 340    uint32_t location;
 341 };
 342
 343 static void
 344 mark_all_used_slots(nir_variable *var, uint64_t *slots_used,
 345                     uint64_t slots_used_mask, unsigned num_slots)
 346 {
 347    unsigned loc_offset = var->data.patch ? VARYING_SLOT_PATCH0 : 0;
 348
 349    slots_used[var->data.patch ? 1 : 0] |= slots_used_mask &
 350       BITFIELD64_RANGE(var->data.location - loc_offset, num_slots);
 351 }
 352
 353 static void
 354 mark_used_slot(nir_variable *var, uint64_t *slots_used, unsigned offset)
 355 {
 356    unsigned loc_offset = var->data.patch ? VARYING_SLOT_PATCH0 : 0;
 357
 358    slots_used[var->data.patch ? 1 : 0] |=
 359       BITFIELD64_BIT(var->data.location - loc_offset + offset);
 360 }
 361
 362 static void
 363 remap_slots_and_components(struct exec_list *var_list, gl_shader_stage stage,
 364                            struct varying_loc (*remap)[4],
 365                            uint64_t *slots_used, uint64_t *out_slots_read,
 366                            uint32_t *p_slots_used, uint32_t *p_out_slots_read)
 367  {
 368    uint64_t out_slots_read_tmp[2] = {0};
 369    uint64_t slots_used_tmp[2] = {0};
 370
 371    /* We don't touch builtins so just copy the bitmask */
 372    slots_used_tmp[0] = *slots_used & BITFIELD64_RANGE(0, VARYING_SLOT_VAR0);
 373
 374    nir_foreach_variable(var, var_list) {
 375       assert(var->data.location >= 0);
 376
 377       /* Only remap things that aren't built-ins */
 378       if (var->data.location >= VARYING_SLOT_VAR0 &&
 379           var->data.location - VARYING_SLOT_VAR0 < MAX_VARYINGS_INCL_PATCH) {
 380
 381          const struct glsl_type *type = var->type;
 382          if (nir_is_per_vertex_io(var, stage) || var->data.per_view) {
 383             assert(glsl_type_is_array(type));
 384             type = glsl_get_array_element(type);
 385          }
 386
 387          unsigned num_slots = glsl_count_attribute_slots(type, false);
 388          bool used_across_stages = false;
 389          bool outputs_read = false;
 390
 391          unsigned location = var->data.location - VARYING_SLOT_VAR0;
 392          struct varying_loc *new_loc = &remap[location][var->data.location_frac];
 393
 394          unsigned loc_offset = var->data.patch ? VARYING_SLOT_PATCH0 : 0;
 395          uint64_t used = var->data.patch ? *p_slots_used : *slots_used;
 396          uint64_t outs_used =
 397             var->data.patch ? *p_out_slots_read : *out_slots_read;
 398          uint64_t slots =
 399             BITFIELD64_RANGE(var->data.location - loc_offset, num_slots);
 400
 401          if (slots & used)
 402             used_across_stages = true;
 403
 404          if (slots & outs_used)
 405             outputs_read = true;
 406
 407          if (new_loc->location) {
 408             var->data.location = new_loc->location;
 409             var->data.location_frac = new_loc->component;
 410          }
 411
 412          if (var->data.always_active_io) {
 413             /* We can't apply link time optimisations (specifically array
 414              * splitting) to these so we need to copy the existing mask
 415              * otherwise we will mess up the mask for things like partially
 416              * marked arrays.
 417              */
 418             if (used_across_stages)
 419                mark_all_used_slots(var, slots_used_tmp, used, num_slots);
 420
 421             if (outputs_read) {
 422                mark_all_used_slots(var, out_slots_read_tmp, outs_used,
 423                                    num_slots);
 424             }
 425          } else {
 426             for (unsigned i = 0; i < num_slots; i++) {
 427                if (used_across_stages)
 428                   mark_used_slot(var, slots_used_tmp, i);
 429
 430                if (outputs_read)
 431                   mark_used_slot(var, out_slots_read_tmp, i);
 432             }
 433          }
 434       }
 435    }
 436
 437    *slots_used = slots_used_tmp[0];
 438    *out_slots_read = out_slots_read_tmp[0];
 439    *p_slots_used = slots_used_tmp[1];
 440    *p_out_slots_read = out_slots_read_tmp[1];
 441 }
 442
 443 struct varying_component {
 444    nir_variable *var;
 445    uint8_t interp_type;
 446    uint8_t interp_loc;
 447    bool is_32bit;
 448    bool is_patch;
 449    bool is_intra_stage_only;
 450    bool initialised;
 451 };
 452
 453 static int
 454 cmp_varying_component(const void *comp1_v, const void *comp2_v)
 455 {
 456    struct varying_component *comp1 = (struct varying_component *) comp1_v;
 457    struct varying_component *comp2 = (struct varying_component *) comp2_v;
 458
 459    /* We want patches to be order at the end of the array */
 460    if (comp1->is_patch != comp2->is_patch)
 461       return comp1->is_patch ? 1 : -1;
 462
 463    /* We want to try to group together TCS outputs that are only read by other
 464     * TCS invocations and not consumed by the follow stage.
 465     */
 466    if (comp1->is_intra_stage_only != comp2->is_intra_stage_only)
 467       return comp1->is_intra_stage_only ? 1 : -1;
 468
 469    /* We can only pack varyings with matching interpolation types so group
 470     * them together.
 471     */
 472    if (comp1->interp_type != comp2->interp_type)
 473       return comp1->interp_type - comp2->interp_type;
 474
 475    /* Interpolation loc must match also. */
 476    if (comp1->interp_loc != comp2->interp_loc)
 477       return comp1->interp_loc - comp2->interp_loc;
 478
 479    /* If everything else matches just use the original location to sort */
 480    return comp1->var->data.location - comp2->var->data.location;
 481 }
 482
 483 static void
 484 gather_varying_component_info(nir_shader *producer, nir_shader *consumer,
 485                               struct varying_component **varying_comp_info,
 486                               unsigned *varying_comp_info_size,
 487                               bool default_to_smooth_interp)
 488 {
 489    unsigned store_varying_info_idx[MAX_VARYINGS_INCL_PATCH][4] = {{0}};
 490    unsigned num_of_comps_to_pack = 0;
 491
 492    /* Count the number of varying that can be packed and create a mapping
 493     * of those varyings to the array we will pass to qsort.
 494     */
 495    nir_foreach_shader_out_variable(var, producer) {
 496
 497       /* Only remap things that aren't builtins. */
 498       if (var->data.location >= VARYING_SLOT_VAR0 &&
 499           var->data.location - VARYING_SLOT_VAR0 < MAX_VARYINGS_INCL_PATCH) {
 500
 501          /* We can't repack xfb varyings. */
 502          if (var->data.always_active_io)
 503             continue;
 504
 505          const struct glsl_type *type = var->type;
 506          if (nir_is_per_vertex_io(var, producer->info.stage) || var->data.per_view) {
 507             assert(glsl_type_is_array(type));
 508             type = glsl_get_array_element(type);
 509          }
 510
 511          if (!is_packing_supported_for_type(type))
 512             continue;
 513
 514          unsigned loc = var->data.location - VARYING_SLOT_VAR0;
 515          store_varying_info_idx[loc][var->data.location_frac] =
 516             ++num_of_comps_to_pack;
 517       }
 518    }
 519
 520    *varying_comp_info_size = num_of_comps_to_pack;
 521    *varying_comp_info = rzalloc_array(NULL, struct varying_component,
 522                                       num_of_comps_to_pack);
 523
 524    nir_function_impl *impl = nir_shader_get_entrypoint(consumer);
 525
 526    /* Walk over the shader and populate the varying component info array */
 527    nir_foreach_block(block, impl) {
 528       nir_foreach_instr(instr, block) {
 529          if (instr->type != nir_instr_type_intrinsic)
 530             continue;
 531
 532          nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
 533          if (intr->intrinsic != nir_intrinsic_load_deref &&
 534              intr->intrinsic != nir_intrinsic_interp_deref_at_centroid &&
 535              intr->intrinsic != nir_intrinsic_interp_deref_at_sample &&
 536              intr->intrinsic != nir_intrinsic_interp_deref_at_offset &&
 537              intr->intrinsic != nir_intrinsic_interp_deref_at_vertex)
 538             continue;
 539
 540          nir_deref_instr *deref = nir_src_as_deref(intr->src[0]);
 541          if (deref->mode != nir_var_shader_in)
 542             continue;
 543
 544          /* We only remap things that aren't builtins. */
 545          nir_variable *in_var = nir_deref_instr_get_variable(deref);
 546          if (in_var->data.location < VARYING_SLOT_VAR0)
 547             continue;
 548
 549          unsigned location = in_var->data.location - VARYING_SLOT_VAR0;
 550          if (location >= MAX_VARYINGS_INCL_PATCH)
 551             continue;
 552
 553          unsigned var_info_idx =
 554             store_varying_info_idx[location][in_var->data.location_frac];
 555          if (!var_info_idx)
 556             continue;
 557
 558          struct varying_component *vc_info =
 559             &(*varying_comp_info)[var_info_idx-1];
 560
 561          if (!vc_info->initialised) {
 562             const struct glsl_type *type = in_var->type;
 563             if (nir_is_per_vertex_io(in_var, consumer->info.stage) ||
 564                 in_var->data.per_view) {
 565                assert(glsl_type_is_array(type));
 566                type = glsl_get_array_element(type);
 567             }
 568
 569             vc_info->var = in_var;
 570             vc_info->interp_type =
 571                get_interp_type(in_var, type, default_to_smooth_interp);
 572             vc_info->interp_loc = get_interp_loc(in_var);
 573             vc_info->is_32bit = glsl_type_is_32bit(type);
 574             vc_info->is_patch = in_var->data.patch;
 575             vc_info->is_intra_stage_only = false;
 576             vc_info->initialised = true;
 577          }
 578       }
 579    }
 580
 581    /* Walk over the shader and populate the varying component info array
 582     * for varyings which are read by other TCS instances but are not consumed
 583     * by the TES.
 584     */
 585    if (producer->info.stage == MESA_SHADER_TESS_CTRL) {
 586       impl = nir_shader_get_entrypoint(producer);
 587
 588       nir_foreach_block(block, impl) {
 589          nir_foreach_instr(instr, block) {
 590             if (instr->type != nir_instr_type_intrinsic)
 591                continue;
 592
 593             nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
 594             if (intr->intrinsic != nir_intrinsic_load_deref)
 595                continue;
 596
 597             nir_deref_instr *deref = nir_src_as_deref(intr->src[0]);
 598             if (deref->mode != nir_var_shader_out)
 599                continue;
 600
 601             /* We only remap things that aren't builtins. */
 602             nir_variable *out_var = nir_deref_instr_get_variable(deref);
 603             if (out_var->data.location < VARYING_SLOT_VAR0)
 604                continue;
 605
 606             unsigned location = out_var->data.location - VARYING_SLOT_VAR0;
 607             if (location >= MAX_VARYINGS_INCL_PATCH)
 608                continue;
 609
 610             unsigned var_info_idx =
 611                store_varying_info_idx[location][out_var->data.location_frac];
 612             if (!var_info_idx) {
 613                /* Something went wrong, the shader interfaces didn't match, so
 614                 * abandon packing. This can happen for example when the
 615                 * inputs are scalars but the outputs are struct members.
 616                 */
 617                *varying_comp_info_size = 0;
 618                break;
 619             }
 620
 621             struct varying_component *vc_info =
 622                &(*varying_comp_info)[var_info_idx-1];
 623
 624             if (!vc_info->initialised) {
 625                const struct glsl_type *type = out_var->type;
 626                if (nir_is_per_vertex_io(out_var, producer->info.stage)) {
 627                   assert(glsl_type_is_array(type));
 628                   type = glsl_get_array_element(type);
 629                }
 630
 631                vc_info->var = out_var;
 632                vc_info->interp_type =
 633                   get_interp_type(out_var, type, default_to_smooth_interp);
 634                vc_info->interp_loc = get_interp_loc(out_var);
 635                vc_info->is_32bit = glsl_type_is_32bit(type);
 636                vc_info->is_patch = out_var->data.patch;
 637                vc_info->is_intra_stage_only = true;
 638                vc_info->initialised = true;
 639             }
 640          }
 641       }
 642    }
 643
 644    for (unsigned i = 0; i < *varying_comp_info_size; i++ ) {
 645       struct varying_component *vc_info = &(*varying_comp_info)[i];
 646       if (!vc_info->initialised) {
 647          /* Something went wrong, the shader interfaces didn't match, so
 648           * abandon packing. This can happen for example when the outputs are
 649           * scalars but the inputs are struct members.
 650           */
 651          *varying_comp_info_size = 0;
 652          break;
 653       }
 654    }
 655 }
 656
 657 static void
 658 assign_remap_locations(struct varying_loc (*remap)[4],
 659                        struct assigned_comps *assigned_comps,
 660                        struct varying_component *info,
 661                        unsigned *cursor, unsigned *comp,
 662                        unsigned max_location)
 663 {
 664    unsigned tmp_cursor = *cursor;
 665    unsigned tmp_comp = *comp;
 666
 667    for (; tmp_cursor < max_location; tmp_cursor++) {
 668
 669       if (assigned_comps[tmp_cursor].comps) {
 670          /* We can only pack varyings with matching interpolation types,
 671           * interpolation loc must match also.
 672           * TODO: i965 can handle interpolation locations that don't match,
 673           * but the radeonsi nir backend handles everything as vec4s and so
 674           * expects this to be the same for all components. We could make this
 675           * check driver specfific or drop it if NIR ever become the only
 676           * radeonsi backend.
 677           */
 678          if (assigned_comps[tmp_cursor].interp_type != info->interp_type ||
 679              assigned_comps[tmp_cursor].interp_loc != info->interp_loc) {
 680             tmp_comp = 0;
 681             continue;
 682          }
 683
 684          /* We can only pack varyings with matching types, and the current
 685           * algorithm only supports packing 32-bit.
 686           */
 687          if (!assigned_comps[tmp_cursor].is_32bit) {
 688             tmp_comp = 0;
 689             continue;
 690          }
 691
 692          while (tmp_comp < 4 &&
 693                 (assigned_comps[tmp_cursor].comps & (1 << tmp_comp))) {
 694             tmp_comp++;
 695          }
 696       }
 697
 698       if (tmp_comp == 4) {
 699          tmp_comp = 0;
 700          continue;
 701       }
 702
 703       unsigned location = info->var->data.location - VARYING_SLOT_VAR0;
 704
 705       /* Once we have assigned a location mark it as used */
 706       assigned_comps[tmp_cursor].comps |= (1 << tmp_comp);
 707       assigned_comps[tmp_cursor].interp_type = info->interp_type;
 708       assigned_comps[tmp_cursor].interp_loc = info->interp_loc;
 709       assigned_comps[tmp_cursor].is_32bit = info->is_32bit;
 710
 711       /* Assign remap location */
 712       remap[location][info->var->data.location_frac].component = tmp_comp++;
 713       remap[location][info->var->data.location_frac].location =
 714          tmp_cursor + VARYING_SLOT_VAR0;
 715
 716       break;
 717    }
 718
 719    *cursor = tmp_cursor;
 720    *comp = tmp_comp;
 721 }
 722
 723 /* If there are empty components in the slot compact the remaining components
 724  * as close to component 0 as possible. This will make it easier to fill the
 725  * empty components with components from a different slot in a following pass.
 726  */
 727 static void
 728 compact_components(nir_shader *producer, nir_shader *consumer,
 729                    struct assigned_comps *assigned_comps,
 730                    bool default_to_smooth_interp)
 731 {
 732    struct exec_list *input_list = &consumer->inputs;
 733    struct exec_list *output_list = &producer->outputs;
 734    struct varying_loc remap[MAX_VARYINGS_INCL_PATCH][4] = {{{0}, {0}}};
 735    struct varying_component *varying_comp_info;
 736    unsigned varying_comp_info_size;
 737
 738    /* Gather varying component info */
 739    gather_varying_component_info(producer, consumer, &varying_comp_info,
 740                                  &varying_comp_info_size,
 741                                  default_to_smooth_interp);
 742
 743    /* Sort varying components. */
 744    qsort(varying_comp_info, varying_comp_info_size,
 745          sizeof(struct varying_component), cmp_varying_component);
 746
 747    unsigned cursor = 0;
 748    unsigned comp = 0;
 749
 750    /* Set the remap array based on the sorted components */
 751    for (unsigned i = 0; i < varying_comp_info_size; i++ ) {
 752       struct varying_component *info = &varying_comp_info[i];
 753
 754       assert(info->is_patch || cursor < MAX_VARYING);
 755       if (info->is_patch) {
 756          /* The list should be sorted with all non-patch inputs first followed
 757           * by patch inputs.  When we hit our first patch input, we need to
 758           * reset the cursor to MAX_VARYING so we put them in the right slot.
 759           */
 760          if (cursor < MAX_VARYING) {
 761             cursor = MAX_VARYING;
 762             comp = 0;
 763          }
 764
 765          assign_remap_locations(remap, assigned_comps, info,
 766                                 &cursor, &comp, MAX_VARYINGS_INCL_PATCH);
 767       } else {
 768          assign_remap_locations(remap, assigned_comps, info,
 769                                 &cursor, &comp, MAX_VARYING);
 770
 771          /* Check if we failed to assign a remap location. This can happen if
 772           * for example there are a bunch of unmovable components with
 773           * mismatching interpolation types causing us to skip over locations
 774           * that would have been useful for packing later components.
 775           * The solution is to iterate over the locations again (this should
 776           * happen very rarely in practice).
 777           */
 778          if (cursor == MAX_VARYING) {
 779             cursor = 0;
 780             comp = 0;
 781             assign_remap_locations(remap, assigned_comps, info,
 782                                    &cursor, &comp, MAX_VARYING);
 783          }
 784       }
 785    }
 786
 787    ralloc_free(varying_comp_info);
 788
 789    uint64_t zero = 0;
 790    uint32_t zero32 = 0;
 791    remap_slots_and_components(input_list, consumer->info.stage, remap,
 792                               &consumer->info.inputs_read, &zero,
 793                               &consumer->info.patch_inputs_read, &zero32);
 794    remap_slots_and_components(output_list, producer->info.stage, remap,
 795                               &producer->info.outputs_written,
 796                               &producer->info.outputs_read,
 797                               &producer->info.patch_outputs_written,
 798                               &producer->info.patch_outputs_read);
 799 }
 800
 801 /* We assume that this has been called more-or-less directly after
 802  * remove_unused_varyings.  At this point, all of the varyings that we
 803  * aren't going to be using have been completely removed and the
 804  * inputs_read and outputs_written fields in nir_shader_info reflect
 805  * this.  Therefore, the total set of valid slots is the OR of the two
 806  * sets of varyings;  this accounts for varyings which one side may need
 807  * to read/write even if the other doesn't.  This can happen if, for
 808  * instance, an array is used indirectly from one side causing it to be
 809  * unsplittable but directly from the other.
 810  */
 811 void
 812 nir_compact_varyings(nir_shader *producer, nir_shader *consumer,
 813                      bool default_to_smooth_interp)
 814 {
 815    assert(producer->info.stage != MESA_SHADER_FRAGMENT);
 816    assert(consumer->info.stage != MESA_SHADER_VERTEX);
 817
 818    struct assigned_comps assigned_comps[MAX_VARYINGS_INCL_PATCH] = {{0}};
 819
 820    get_unmoveable_components_masks(&producer->outputs, assigned_comps,
 821                                    producer->info.stage,
 822                                    default_to_smooth_interp);
 823    get_unmoveable_components_masks(&consumer->inputs, assigned_comps,
 824                                    consumer->info.stage,
 825                                    default_to_smooth_interp);
 826
 827    compact_components(producer, consumer, assigned_comps,
 828                       default_to_smooth_interp);
 829 }
 830
 831 /*
 832  * Mark XFB varyings as always_active_io in the consumer so the linking opts
 833  * don't touch them.
 834  */
 835 void
 836 nir_link_xfb_varyings(nir_shader *producer, nir_shader *consumer)
 837 {
 838    nir_variable *input_vars[MAX_VARYING] = { 0 };
 839
 840    nir_foreach_shader_in_variable(var, consumer) {
 841       if (var->data.location >= VARYING_SLOT_VAR0 &&
 842           var->data.location - VARYING_SLOT_VAR0 < MAX_VARYING) {
 843
 844          unsigned location = var->data.location - VARYING_SLOT_VAR0;
 845          input_vars[location] = var;
 846       }
 847    }
 848
 849    nir_foreach_shader_out_variable(var, producer) {
 850       if (var->data.location >= VARYING_SLOT_VAR0 &&
 851           var->data.location - VARYING_SLOT_VAR0 < MAX_VARYING) {
 852
 853          if (!var->data.always_active_io)
 854             continue;
 855
 856          unsigned location = var->data.location - VARYING_SLOT_VAR0;
 857          if (input_vars[location]) {
 858             input_vars[location]->data.always_active_io = true;
 859          }
 860       }
 861    }
 862 }
 863
 864 static bool
 865 does_varying_match(nir_variable *out_var, nir_variable *in_var)
 866 {
 867    return in_var->data.location == out_var->data.location &&
 868           in_var->data.location_frac == out_var->data.location_frac;
 869 }
 870
 871 static nir_variable *
 872 get_matching_input_var(nir_shader *consumer, nir_variable *out_var)
 873 {
 874    nir_foreach_shader_in_variable(var, consumer) {
 875       if (does_varying_match(out_var, var))
 876          return var;
 877    }
 878
 879    return NULL;
 880 }
 881
 882 static bool
 883 can_replace_varying(nir_variable *out_var)
 884 {
 885    /* Skip types that require more complex handling.
 886     * TODO: add support for these types.
 887     */
 888    if (glsl_type_is_array(out_var->type) ||
 889        glsl_type_is_dual_slot(out_var->type) ||
 890        glsl_type_is_matrix(out_var->type) ||
 891        glsl_type_is_struct_or_ifc(out_var->type))
 892       return false;
 893
 894    /* Limit this pass to scalars for now to keep things simple. Most varyings
 895     * should have been lowered to scalars at this point anyway.
 896     */
 897    if (!glsl_type_is_scalar(out_var->type))
 898       return false;
 899
 900    if (out_var->data.location < VARYING_SLOT_VAR0 ||
 901        out_var->data.location - VARYING_SLOT_VAR0 >= MAX_VARYING)
 902       return false;
 903
 904    return true;
 905 }
 906
 907 static bool
 908 replace_constant_input(nir_shader *shader, nir_intrinsic_instr *store_intr)
 909 {
 910    nir_function_impl *impl = nir_shader_get_entrypoint(shader);
 911
 912    nir_builder b;
 913    nir_builder_init(&b, impl);
 914
 915    nir_variable *out_var =
 916       nir_deref_instr_get_variable(nir_src_as_deref(store_intr->src[0]));
 917
 918    bool progress = false;
 919    nir_foreach_block(block, impl) {
 920       nir_foreach_instr(instr, block) {
 921          if (instr->type != nir_instr_type_intrinsic)
 922             continue;
 923
 924          nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
 925          if (intr->intrinsic != nir_intrinsic_load_deref)
 926             continue;
 927
 928          nir_deref_instr *in_deref = nir_src_as_deref(intr->src[0]);
 929          if (in_deref->mode != nir_var_shader_in)
 930             continue;
 931
 932          nir_variable *in_var = nir_deref_instr_get_variable(in_deref);
 933
 934          if (!does_varying_match(out_var, in_var))
 935             continue;
 936
 937          b.cursor = nir_before_instr(instr);
 938
 939          nir_load_const_instr *out_const =
 940             nir_instr_as_load_const(store_intr->src[1].ssa->parent_instr);
 941
 942          /* Add new const to replace the input */
 943          nir_ssa_def *nconst = nir_build_imm(&b, store_intr->num_components,
 944                                              intr->dest.ssa.bit_size,
 945                                              out_const->value);
 946
 947          nir_ssa_def_rewrite_uses(&intr->dest.ssa, nir_src_for_ssa(nconst));
 948
 949          progress = true;
 950       }
 951    }
 952
 953    return progress;
 954 }
 955
 956 static bool
 957 replace_duplicate_input(nir_shader *shader, nir_variable *input_var,
 958                          nir_intrinsic_instr *dup_store_intr)
 959 {
 960    assert(input_var);
 961
 962    nir_function_impl *impl = nir_shader_get_entrypoint(shader);
 963
 964    nir_builder b;
 965    nir_builder_init(&b, impl);
 966
 967    nir_variable *dup_out_var =
 968       nir_deref_instr_get_variable(nir_src_as_deref(dup_store_intr->src[0]));
 969
 970    bool progress = false;
 971    nir_foreach_block(block, impl) {
 972       nir_foreach_instr(instr, block) {
 973          if (instr->type != nir_instr_type_intrinsic)
 974             continue;
 975
 976          nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
 977          if (intr->intrinsic != nir_intrinsic_load_deref)
 978             continue;
 979
 980          nir_deref_instr *in_deref = nir_src_as_deref(intr->src[0]);
 981          if (in_deref->mode != nir_var_shader_in)
 982             continue;
 983
 984          nir_variable *in_var = nir_deref_instr_get_variable(in_deref);
 985
 986          if (!does_varying_match(dup_out_var, in_var) ||
 987              in_var->data.interpolation != input_var->data.interpolation ||
 988              get_interp_loc(in_var) != get_interp_loc(input_var))
 989             continue;
 990
 991          b.cursor = nir_before_instr(instr);
 992
 993          nir_ssa_def *load = nir_load_var(&b, input_var);
 994          nir_ssa_def_rewrite_uses(&intr->dest.ssa, nir_src_for_ssa(load));
 995
 996          progress = true;
 997       }
 998    }
 999
1000    return progress;
1001 }
1002
1003 bool
1004 nir_link_opt_varyings(nir_shader *producer, nir_shader *consumer)
1005 {
1006    /* TODO: Add support for more shader stage combinations */
1007    if (consumer->info.stage != MESA_SHADER_FRAGMENT ||
1008        (producer->info.stage != MESA_SHADER_VERTEX &&
1009         producer->info.stage != MESA_SHADER_TESS_EVAL))
1010       return false;
1011
1012    bool progress = false;
1013
1014    nir_function_impl *impl = nir_shader_get_entrypoint(producer);
1015
1016    struct hash_table *varying_values = _mesa_pointer_hash_table_create(NULL);
1017
1018    /* If we find a store in the last block of the producer we can be sure this
1019     * is the only possible value for this output.
1020     */
1021    nir_block *last_block = nir_impl_last_block(impl);
1022    nir_foreach_instr_reverse(instr, last_block) {
1023       if (instr->type != nir_instr_type_intrinsic)
1024          continue;
1025
1026       nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
1027
1028       if (intr->intrinsic != nir_intrinsic_store_deref)
1029          continue;
1030
1031       nir_deref_instr *out_deref = nir_src_as_deref(intr->src[0]);
1032       if (out_deref->mode != nir_var_shader_out)
1033          continue;
1034
1035       nir_variable *out_var = nir_deref_instr_get_variable(out_deref);
1036       if (!can_replace_varying(out_var))
1037          continue;
1038
1039       if (intr->src[1].ssa->parent_instr->type == nir_instr_type_load_const) {
1040          progress |= replace_constant_input(consumer, intr);
1041       } else {
1042          struct hash_entry *entry =
1043                _mesa_hash_table_search(varying_values, intr->src[1].ssa);
1044          if (entry) {
1045             progress |= replace_duplicate_input(consumer,
1046                                                 (nir_variable *) entry->data,
1047                                                 intr);
1048          } else {
1049             nir_variable *in_var = get_matching_input_var(consumer, out_var);
1050             if (in_var) {
1051                _mesa_hash_table_insert(varying_values, intr->src[1].ssa,
1052                                        in_var);
1053             }
1054          }
1055       }
1056    }
1057
1058    _mesa_hash_table_destroy(varying_values, NULL);
1059
1060    return progress;
1061 }
1062
1063 /* TODO any better helper somewhere to sort a list? */
1064
1065 static void
1066 insert_sorted(struct exec_list *var_list, nir_variable *new_var)
1067 {
1068    nir_foreach_variable(var, var_list) {
1069       if (var->data.location > new_var->data.location) {
1070          exec_node_insert_node_before(&var->node, &new_var->node);
1071          return;
1072       }
1073    }
1074    exec_list_push_tail(var_list, &new_var->node);
1075 }
1076
1077 static void
1078 sort_varyings(nir_shader *shader, nir_variable_mode mode,
1079               struct exec_list *sorted_list)
1080 {
1081    exec_list_make_empty(sorted_list);
1082    nir_foreach_variable_with_modes_safe(var, shader, mode) {
1083       exec_node_remove(&var->node);
1084       insert_sorted(sorted_list, var);
1085    }
1086 }
1087
1088 void
1089 nir_assign_io_var_locations(nir_shader *shader, nir_variable_mode mode,
1090                             unsigned *size, gl_shader_stage stage)
1091 {
1092    unsigned location = 0;
1093    unsigned assigned_locations[VARYING_SLOT_TESS_MAX];
1094    uint64_t processed_locs[2] = {0};
1095
1096    struct exec_list io_vars;
1097    sort_varyings(shader, mode, &io_vars);
1098
1099    int UNUSED last_loc = 0;
1100    bool last_partial = false;
1101    nir_foreach_variable(var, &io_vars) {
1102       const struct glsl_type *type = var->type;
1103       if (nir_is_per_vertex_io(var, stage) || var->data.per_view) {
1104          assert(glsl_type_is_array(type));
1105          type = glsl_get_array_element(type);
1106       }
1107
1108       int base;
1109       if (var->data.mode == nir_var_shader_in && stage == MESA_SHADER_VERTEX)
1110          base = VERT_ATTRIB_GENERIC0;
1111       else if (var->data.mode == nir_var_shader_out &&
1112                stage == MESA_SHADER_FRAGMENT)
1113          base = FRAG_RESULT_DATA0;
1114       else
1115          base = VARYING_SLOT_VAR0;
1116
1117       unsigned var_size;
1118       if (var->data.compact) {
1119          /* If we are inside a partial compact,
1120           * don't allow another compact to be in this slot
1121           * if it starts at component 0.
1122           */
1123          if (last_partial && var->data.location_frac == 0) {
1124             location++;
1125          }
1126
1127          /* compact variables must be arrays of scalars */
1128          assert(glsl_type_is_array(type));
1129          assert(glsl_type_is_scalar(glsl_get_array_element(type)));
1130          unsigned start = 4 * location + var->data.location_frac;
1131          unsigned end = start + glsl_get_length(type);
1132          var_size = end / 4 - location;
1133          last_partial = end % 4 != 0;
1134       } else {
1135          /* Compact variables bypass the normal varying compacting pass,
1136           * which means they cannot be in the same vec4 slot as a normal
1137           * variable. If part of the current slot is taken up by a compact
1138           * variable, we need to go to the next one.
1139           */
1140          if (last_partial) {
1141             location++;
1142             last_partial = false;
1143          }
1144          var_size = glsl_count_attribute_slots(type, false);
1145       }
1146
1147       /* Builtins don't allow component packing so we only need to worry about
1148        * user defined varyings sharing the same location.
1149        */
1150       bool processed = false;
1151       if (var->data.location >= base) {
1152          unsigned glsl_location = var->data.location - base;
1153
1154          for (unsigned i = 0; i < var_size; i++) {
1155             if (processed_locs[var->data.index] &
1156                 ((uint64_t)1 << (glsl_location + i)))
1157                processed = true;
1158             else
1159                processed_locs[var->data.index] |=
1160                   ((uint64_t)1 << (glsl_location + i));
1161          }
1162       }
1163
1164       /* Because component packing allows varyings to share the same location
1165        * we may have already have processed this location.
1166        */
1167       if (processed) {
1168          unsigned driver_location = assigned_locations[var->data.location];
1169          var->data.driver_location = driver_location;
1170
1171          /* An array may be packed such that is crosses multiple other arrays
1172           * or variables, we need to make sure we have allocated the elements
1173           * consecutively if the previously proccessed var was shorter than
1174           * the current array we are processing.
1175           *
1176           * NOTE: The code below assumes the var list is ordered in ascending
1177           * location order.
1178           */
1179          assert(last_loc <= var->data.location);
1180          last_loc = var->data.location;
1181          unsigned last_slot_location = driver_location + var_size;
1182          if (last_slot_location > location) {
1183             unsigned num_unallocated_slots = last_slot_location - location;
1184             unsigned first_unallocated_slot = var_size - num_unallocated_slots;
1185             for (unsigned i = first_unallocated_slot; i < var_size; i++) {
1186                assigned_locations[var->data.location + i] = location;
1187                location++;
1188             }
1189          }
1190          continue;
1191       }
1192
1193       for (unsigned i = 0; i < var_size; i++) {
1194          assigned_locations[var->data.location + i] = location + i;
1195       }
1196
1197       var->data.driver_location = location;
1198       location += var_size;
1199    }
1200
1201    if (last_partial)
1202       location++;
1203
1204    struct exec_list *var_list = nir_variable_list_for_mode(shader, mode);
1205    exec_list_append(var_list, &io_vars);
1206    *size = location;
1207 }
1208
1209 static uint64_t
1210 get_linked_variable_location(unsigned location, bool patch)
1211 {
1212    if (!patch)
1213       return location;
1214
1215    /* Reserve locations 0...3 for special patch variables
1216     * like tess factors and bounding boxes, and the generic patch
1217     * variables will come after them.
1218     */
1219    if (location >= VARYING_SLOT_PATCH0)
1220       return location - VARYING_SLOT_PATCH0 + 4;
1221    else if (location >= VARYING_SLOT_TESS_LEVEL_OUTER &&
1222             location <= VARYING_SLOT_BOUNDING_BOX1)
1223       return location - VARYING_SLOT_TESS_LEVEL_OUTER;
1224    else
1225       unreachable("Unsupported variable in get_linked_variable_location.");
1226 }
1227
1228 static uint64_t
1229 get_linked_variable_io_mask(nir_variable *variable, gl_shader_stage stage)
1230 {
1231    const struct glsl_type *type = variable->type;
1232
1233    if (nir_is_per_vertex_io(variable, stage)) {
1234       assert(glsl_type_is_array(type));
1235       type = glsl_get_array_element(type);
1236    }
1237
1238    unsigned slots = glsl_count_attribute_slots(type, false);
1239    if (variable->data.compact) {
1240       unsigned component_count = variable->data.location_frac + glsl_get_length(type);
1241       slots = DIV_ROUND_UP(component_count, 4);
1242    }
1243
1244    uint64_t mask = u_bit_consecutive64(0, slots);
1245    return mask;
1246 }
1247
1248 nir_linked_io_var_info
1249 nir_assign_linked_io_var_locations(nir_shader *producer, nir_shader *consumer)
1250 {
1251    assert(producer);
1252    assert(consumer);
1253
1254    uint64_t producer_output_mask = 0;
1255    uint64_t producer_patch_output_mask = 0;
1256
1257    nir_foreach_shader_out_variable(variable, producer) {
1258       uint64_t mask = get_linked_variable_io_mask(variable, producer->info.stage);
1259       uint64_t loc = get_linked_variable_location(variable->data.location, variable->data.patch);
1260
1261       if (variable->data.patch)
1262          producer_patch_output_mask |= mask << loc;
1263       else
1264          producer_output_mask |= mask << loc;
1265    }
1266
1267    uint64_t consumer_input_mask = 0;
1268    uint64_t consumer_patch_input_mask = 0;
1269
1270    nir_foreach_shader_in_variable(variable, consumer) {
1271       uint64_t mask = get_linked_variable_io_mask(variable, consumer->info.stage);
1272       uint64_t loc = get_linked_variable_location(variable->data.location, variable->data.patch);
1273
1274       if (variable->data.patch)
1275          consumer_patch_input_mask |= mask << loc;
1276       else
1277          consumer_input_mask |= mask << loc;
1278    }
1279
1280    uint64_t io_mask = producer_output_mask | consumer_input_mask;
1281    uint64_t patch_io_mask = producer_patch_output_mask | consumer_patch_input_mask;
1282
1283    nir_foreach_shader_out_variable(variable, producer) {
1284       uint64_t loc = get_linked_variable_location(variable->data.location, variable->data.patch);
1285
1286       if (variable->data.patch)
1287          variable->data.driver_location = util_bitcount64(patch_io_mask & u_bit_consecutive64(0, loc)) * 4;
1288       else
1289          variable->data.driver_location = util_bitcount64(io_mask & u_bit_consecutive64(0, loc)) * 4;
1290    }
1291
1292    nir_foreach_shader_in_variable(variable, consumer) {
1293       uint64_t loc = get_linked_variable_location(variable->data.location, variable->data.patch);
1294
1295       if (variable->data.patch)
1296          variable->data.driver_location = util_bitcount64(patch_io_mask & u_bit_consecutive64(0, loc)) * 4;
1297       else
1298          variable->data.driver_location = util_bitcount64(io_mask & u_bit_consecutive64(0, loc)) * 4;
1299    }
1300
1301    nir_linked_io_var_info result = {
1302       .num_linked_io_vars = util_bitcount64(io_mask),
1303       .num_linked_patch_io_vars = util_bitcount64(patch_io_mask),
1304    };
1305
1306    return result;
1307 }