nir: Record non-vector/scalar varyings as unmovable when compacting
[mesa.git] / src / compiler / nir / nir_linking_helpers.c
1 /*
2 * Copyright © 2015 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include "nir.h"
25 #include "nir_builder.h"
26 #include "util/set.h"
27 #include "util/hash_table.h"
28
29 /* This file contains various little helpers for doing simple linking in
30 * NIR. Eventually, we'll probably want a full-blown varying packing
31 * implementation in here. Right now, it just deletes unused things.
32 */
33
34 /**
35 * Returns the bits in the inputs_read, outputs_written, or
36 * system_values_read bitfield corresponding to this variable.
37 */
38 static uint64_t
39 get_variable_io_mask(nir_variable *var, gl_shader_stage stage)
40 {
41 if (var->data.location < 0)
42 return 0;
43
44 unsigned location = var->data.patch ?
45 var->data.location - VARYING_SLOT_PATCH0 : var->data.location;
46
47 assert(var->data.mode == nir_var_shader_in ||
48 var->data.mode == nir_var_shader_out ||
49 var->data.mode == nir_var_system_value);
50 assert(var->data.location >= 0);
51
52 const struct glsl_type *type = var->type;
53 if (nir_is_per_vertex_io(var, stage)) {
54 assert(glsl_type_is_array(type));
55 type = glsl_get_array_element(type);
56 }
57
58 unsigned slots = glsl_count_attribute_slots(type, false);
59 return ((1ull << slots) - 1) << location;
60 }
61
62 static void
63 tcs_add_output_reads(nir_shader *shader, uint64_t *read, uint64_t *patches_read)
64 {
65 nir_foreach_function(function, shader) {
66 if (!function->impl)
67 continue;
68
69 nir_foreach_block(block, function->impl) {
70 nir_foreach_instr(instr, block) {
71 if (instr->type != nir_instr_type_intrinsic)
72 continue;
73
74 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
75 if (intrin->intrinsic != nir_intrinsic_load_deref)
76 continue;
77
78 nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
79 if (deref->mode != nir_var_shader_out)
80 continue;
81
82 nir_variable *var = nir_deref_instr_get_variable(deref);
83 if (var->data.patch) {
84 patches_read[var->data.location_frac] |=
85 get_variable_io_mask(var, shader->info.stage);
86 } else {
87 read[var->data.location_frac] |=
88 get_variable_io_mask(var, shader->info.stage);
89 }
90 }
91 }
92 }
93 }
94
95 /**
96 * Helper for removing unused shader I/O variables, by demoting them to global
97 * variables (which may then by dead code eliminated).
98 *
99 * Example usage is:
100 *
101 * progress = nir_remove_unused_io_vars(producer,
102 * &producer->outputs,
103 * read, patches_read) ||
104 * progress;
105 *
106 * The "used" should be an array of 4 uint64_ts (probably of VARYING_BIT_*)
107 * representing each .location_frac used. Note that for vector variables,
108 * only the first channel (.location_frac) is examined for deciding if the
109 * variable is used!
110 */
111 bool
112 nir_remove_unused_io_vars(nir_shader *shader, struct exec_list *var_list,
113 uint64_t *used_by_other_stage,
114 uint64_t *used_by_other_stage_patches)
115 {
116 bool progress = false;
117 uint64_t *used;
118
119 nir_foreach_variable_safe(var, var_list) {
120 if (var->data.patch)
121 used = used_by_other_stage_patches;
122 else
123 used = used_by_other_stage;
124
125 if (var->data.location < VARYING_SLOT_VAR0 && var->data.location >= 0)
126 continue;
127
128 if (var->data.always_active_io)
129 continue;
130
131 if (var->data.explicit_xfb_buffer)
132 continue;
133
134 uint64_t other_stage = used[var->data.location_frac];
135
136 if (!(other_stage & get_variable_io_mask(var, shader->info.stage))) {
137 /* This one is invalid, make it a global variable instead */
138 var->data.location = 0;
139 var->data.mode = nir_var_shader_temp;
140
141 exec_node_remove(&var->node);
142 exec_list_push_tail(&shader->globals, &var->node);
143
144 progress = true;
145 }
146 }
147
148 if (progress)
149 nir_fixup_deref_modes(shader);
150
151 return progress;
152 }
153
154 bool
155 nir_remove_unused_varyings(nir_shader *producer, nir_shader *consumer)
156 {
157 assert(producer->info.stage != MESA_SHADER_FRAGMENT);
158 assert(consumer->info.stage != MESA_SHADER_VERTEX);
159
160 uint64_t read[4] = { 0 }, written[4] = { 0 };
161 uint64_t patches_read[4] = { 0 }, patches_written[4] = { 0 };
162
163 nir_foreach_variable(var, &producer->outputs) {
164 if (var->data.patch) {
165 patches_written[var->data.location_frac] |=
166 get_variable_io_mask(var, producer->info.stage);
167 } else {
168 written[var->data.location_frac] |=
169 get_variable_io_mask(var, producer->info.stage);
170 }
171 }
172
173 nir_foreach_variable(var, &consumer->inputs) {
174 if (var->data.patch) {
175 patches_read[var->data.location_frac] |=
176 get_variable_io_mask(var, consumer->info.stage);
177 } else {
178 read[var->data.location_frac] |=
179 get_variable_io_mask(var, consumer->info.stage);
180 }
181 }
182
183 /* Each TCS invocation can read data written by other TCS invocations,
184 * so even if the outputs are not used by the TES we must also make
185 * sure they are not read by the TCS before demoting them to globals.
186 */
187 if (producer->info.stage == MESA_SHADER_TESS_CTRL)
188 tcs_add_output_reads(producer, read, patches_read);
189
190 bool progress = false;
191 progress = nir_remove_unused_io_vars(producer, &producer->outputs, read,
192 patches_read);
193
194 progress = nir_remove_unused_io_vars(consumer, &consumer->inputs, written,
195 patches_written) || progress;
196
197 return progress;
198 }
199
200 static uint8_t
201 get_interp_type(nir_variable *var, const struct glsl_type *type,
202 bool default_to_smooth_interp)
203 {
204 if (glsl_type_is_integer(type))
205 return INTERP_MODE_FLAT;
206 else if (var->data.interpolation != INTERP_MODE_NONE)
207 return var->data.interpolation;
208 else if (default_to_smooth_interp)
209 return INTERP_MODE_SMOOTH;
210 else
211 return INTERP_MODE_NONE;
212 }
213
214 #define INTERPOLATE_LOC_SAMPLE 0
215 #define INTERPOLATE_LOC_CENTROID 1
216 #define INTERPOLATE_LOC_CENTER 2
217
218 static uint8_t
219 get_interp_loc(nir_variable *var)
220 {
221 if (var->data.sample)
222 return INTERPOLATE_LOC_SAMPLE;
223 else if (var->data.centroid)
224 return INTERPOLATE_LOC_CENTROID;
225 else
226 return INTERPOLATE_LOC_CENTER;
227 }
228
229 static bool
230 is_packing_supported_for_type(const struct glsl_type *type)
231 {
232 /* We ignore complex types such as arrays, matrices, structs and bitsizes
233 * other then 32bit. All other vector types should have been split into
234 * scalar variables by the lower_io_to_scalar pass. The only exception
235 * should be OpenGL xfb varyings.
236 * TODO: add support for more complex types?
237 */
238 return glsl_type_is_scalar(type) && glsl_type_is_32bit(type);
239 }
240
241 struct assigned_comps
242 {
243 uint8_t comps;
244 uint8_t interp_type;
245 uint8_t interp_loc;
246 };
247
248 /* Packing arrays and dual slot varyings is difficult so to avoid complex
249 * algorithms this function just assigns them their existing location for now.
250 * TODO: allow better packing of complex types.
251 */
252 static void
253 get_unmoveable_components_masks(struct exec_list *var_list,
254 struct assigned_comps *comps,
255 gl_shader_stage stage,
256 bool default_to_smooth_interp)
257 {
258 nir_foreach_variable_safe(var, var_list) {
259 assert(var->data.location >= 0);
260
261 /* Only remap things that aren't built-ins. */
262 if (var->data.location >= VARYING_SLOT_VAR0 &&
263 var->data.location - VARYING_SLOT_VAR0 < MAX_VARYINGS_INCL_PATCH) {
264
265 const struct glsl_type *type = var->type;
266 if (nir_is_per_vertex_io(var, stage)) {
267 assert(glsl_type_is_array(type));
268 type = glsl_get_array_element(type);
269 }
270
271 /* If we can pack this varying then don't mark the components as
272 * used.
273 */
274 if (is_packing_supported_for_type(type))
275 continue;
276
277 unsigned location = var->data.location - VARYING_SLOT_VAR0;
278
279 unsigned elements =
280 glsl_type_is_vector_or_scalar(glsl_without_array(type)) ?
281 glsl_get_vector_elements(glsl_without_array(type)) : 4;
282
283 bool dual_slot = glsl_type_is_dual_slot(glsl_without_array(type));
284 unsigned slots = glsl_count_attribute_slots(type, false);
285 unsigned dmul = glsl_type_is_64bit(glsl_without_array(type)) ? 2 : 1;
286 unsigned comps_slot2 = 0;
287 for (unsigned i = 0; i < slots; i++) {
288 if (dual_slot) {
289 if (i & 1) {
290 comps[location + i].comps |= ((1 << comps_slot2) - 1);
291 } else {
292 unsigned num_comps = 4 - var->data.location_frac;
293 comps_slot2 = (elements * dmul) - num_comps;
294
295 /* Assume ARB_enhanced_layouts packing rules for doubles */
296 assert(var->data.location_frac == 0 ||
297 var->data.location_frac == 2);
298 assert(comps_slot2 <= 4);
299
300 comps[location + i].comps |=
301 ((1 << num_comps) - 1) << var->data.location_frac;
302 }
303 } else {
304 comps[location + i].comps |=
305 ((1 << (elements * dmul)) - 1) << var->data.location_frac;
306 }
307
308 comps[location + i].interp_type =
309 get_interp_type(var, type, default_to_smooth_interp);
310 comps[location + i].interp_loc = get_interp_loc(var);
311 }
312 }
313 }
314 }
315
316 struct varying_loc
317 {
318 uint8_t component;
319 uint32_t location;
320 };
321
322 static void
323 mark_all_used_slots(nir_variable *var, uint64_t *slots_used,
324 uint64_t slots_used_mask, unsigned num_slots)
325 {
326 unsigned loc_offset = var->data.patch ? VARYING_SLOT_PATCH0 : 0;
327
328 slots_used[var->data.patch ? 1 : 0] |= slots_used_mask &
329 BITFIELD64_RANGE(var->data.location - loc_offset, num_slots);
330 }
331
332 static void
333 mark_used_slot(nir_variable *var, uint64_t *slots_used, unsigned offset)
334 {
335 unsigned loc_offset = var->data.patch ? VARYING_SLOT_PATCH0 : 0;
336
337 slots_used[var->data.patch ? 1 : 0] |=
338 BITFIELD64_BIT(var->data.location - loc_offset + offset);
339 }
340
341 static void
342 remap_slots_and_components(struct exec_list *var_list, gl_shader_stage stage,
343 struct varying_loc (*remap)[4],
344 uint64_t *slots_used, uint64_t *out_slots_read,
345 uint32_t *p_slots_used, uint32_t *p_out_slots_read)
346 {
347 uint64_t out_slots_read_tmp[2] = {0};
348 uint64_t slots_used_tmp[2] = {0};
349
350 /* We don't touch builtins so just copy the bitmask */
351 slots_used_tmp[0] = *slots_used & BITFIELD64_RANGE(0, VARYING_SLOT_VAR0);
352
353 nir_foreach_variable(var, var_list) {
354 assert(var->data.location >= 0);
355
356 /* Only remap things that aren't built-ins */
357 if (var->data.location >= VARYING_SLOT_VAR0 &&
358 var->data.location - VARYING_SLOT_VAR0 < MAX_VARYINGS_INCL_PATCH) {
359
360 const struct glsl_type *type = var->type;
361 if (nir_is_per_vertex_io(var, stage)) {
362 assert(glsl_type_is_array(type));
363 type = glsl_get_array_element(type);
364 }
365
366 unsigned num_slots = glsl_count_attribute_slots(type, false);
367 bool used_across_stages = false;
368 bool outputs_read = false;
369
370 unsigned location = var->data.location - VARYING_SLOT_VAR0;
371 struct varying_loc *new_loc = &remap[location][var->data.location_frac];
372
373 unsigned loc_offset = var->data.patch ? VARYING_SLOT_PATCH0 : 0;
374 uint64_t used = var->data.patch ? *p_slots_used : *slots_used;
375 uint64_t outs_used =
376 var->data.patch ? *p_out_slots_read : *out_slots_read;
377 uint64_t slots =
378 BITFIELD64_RANGE(var->data.location - loc_offset, num_slots);
379
380 if (slots & used)
381 used_across_stages = true;
382
383 if (slots & outs_used)
384 outputs_read = true;
385
386 if (new_loc->location) {
387 var->data.location = new_loc->location;
388 var->data.location_frac = new_loc->component;
389 }
390
391 if (var->data.always_active_io) {
392 /* We can't apply link time optimisations (specifically array
393 * splitting) to these so we need to copy the existing mask
394 * otherwise we will mess up the mask for things like partially
395 * marked arrays.
396 */
397 if (used_across_stages)
398 mark_all_used_slots(var, slots_used_tmp, used, num_slots);
399
400 if (outputs_read) {
401 mark_all_used_slots(var, out_slots_read_tmp, outs_used,
402 num_slots);
403 }
404 } else {
405 for (unsigned i = 0; i < num_slots; i++) {
406 if (used_across_stages)
407 mark_used_slot(var, slots_used_tmp, i);
408
409 if (outputs_read)
410 mark_used_slot(var, out_slots_read_tmp, i);
411 }
412 }
413 }
414 }
415
416 *slots_used = slots_used_tmp[0];
417 *out_slots_read = out_slots_read_tmp[0];
418 *p_slots_used = slots_used_tmp[1];
419 *p_out_slots_read = out_slots_read_tmp[1];
420 }
421
422 struct varying_component {
423 nir_variable *var;
424 uint8_t interp_type;
425 uint8_t interp_loc;
426 bool is_patch;
427 bool initialised;
428 };
429
430 static int
431 cmp_varying_component(const void *comp1_v, const void *comp2_v)
432 {
433 struct varying_component *comp1 = (struct varying_component *) comp1_v;
434 struct varying_component *comp2 = (struct varying_component *) comp2_v;
435
436 /* We want patches to be order at the end of the array */
437 if (comp1->is_patch != comp2->is_patch)
438 return comp1->is_patch ? 1 : -1;
439
440 /* We can only pack varyings with matching interpolation types so group
441 * them together.
442 */
443 if (comp1->interp_type != comp2->interp_type)
444 return comp1->interp_type - comp2->interp_type;
445
446 /* Interpolation loc must match also. */
447 if (comp1->interp_loc != comp2->interp_loc)
448 return comp1->interp_loc - comp2->interp_loc;
449
450 /* If everything else matches just use the original location to sort */
451 return comp1->var->data.location - comp2->var->data.location;
452 }
453
454 static void
455 gather_varying_component_info(nir_shader *consumer,
456 struct varying_component **varying_comp_info,
457 unsigned *varying_comp_info_size,
458 bool default_to_smooth_interp)
459 {
460 unsigned store_varying_info_idx[MAX_VARYINGS_INCL_PATCH][4] = {0};
461 unsigned num_of_comps_to_pack = 0;
462
463 /* Count the number of varying that can be packed and create a mapping
464 * of those varyings to the array we will pass to qsort.
465 */
466 nir_foreach_variable(var, &consumer->inputs) {
467
468 /* Only remap things that aren't builtins. */
469 if (var->data.location >= VARYING_SLOT_VAR0 &&
470 var->data.location - VARYING_SLOT_VAR0 < MAX_VARYINGS_INCL_PATCH) {
471
472 /* We can't repack xfb varyings. */
473 if (var->data.always_active_io)
474 continue;
475
476 const struct glsl_type *type = var->type;
477 if (nir_is_per_vertex_io(var, consumer->info.stage)) {
478 assert(glsl_type_is_array(type));
479 type = glsl_get_array_element(type);
480 }
481
482 if (!is_packing_supported_for_type(type))
483 continue;
484
485 unsigned loc = var->data.location - VARYING_SLOT_VAR0;
486 store_varying_info_idx[loc][var->data.location_frac] =
487 ++num_of_comps_to_pack;
488 }
489 }
490
491 *varying_comp_info_size = num_of_comps_to_pack;
492 *varying_comp_info = rzalloc_array(NULL, struct varying_component,
493 num_of_comps_to_pack);
494
495 nir_function_impl *impl = nir_shader_get_entrypoint(consumer);
496
497 /* Walk over the shader and populate the varying component info array */
498 nir_foreach_block(block, impl) {
499 nir_foreach_instr(instr, block) {
500 if (instr->type != nir_instr_type_intrinsic)
501 continue;
502
503 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
504 if (intr->intrinsic != nir_intrinsic_load_deref &&
505 intr->intrinsic != nir_intrinsic_interp_deref_at_centroid &&
506 intr->intrinsic != nir_intrinsic_interp_deref_at_sample &&
507 intr->intrinsic != nir_intrinsic_interp_deref_at_offset)
508 continue;
509
510 nir_deref_instr *deref = nir_src_as_deref(intr->src[0]);
511 if (deref->mode != nir_var_shader_in)
512 continue;
513
514 /* We only remap things that aren't builtins. */
515 nir_variable *in_var = nir_deref_instr_get_variable(deref);
516 if (in_var->data.location < VARYING_SLOT_VAR0)
517 continue;
518
519 unsigned location = in_var->data.location - VARYING_SLOT_VAR0;
520 if (location >= MAX_VARYINGS_INCL_PATCH)
521 continue;
522
523 unsigned var_info_idx =
524 store_varying_info_idx[location][in_var->data.location_frac];
525 if (!var_info_idx)
526 continue;
527
528 struct varying_component *vc_info =
529 &(*varying_comp_info)[var_info_idx-1];
530
531 if (!vc_info->initialised) {
532 const struct glsl_type *type = in_var->type;
533 if (nir_is_per_vertex_io(in_var, consumer->info.stage)) {
534 assert(glsl_type_is_array(type));
535 type = glsl_get_array_element(type);
536 }
537
538 vc_info->var = in_var;
539 vc_info->interp_type =
540 get_interp_type(in_var, type, default_to_smooth_interp);
541 vc_info->interp_loc = get_interp_loc(in_var);
542 vc_info->is_patch = in_var->data.patch;
543 }
544 }
545 }
546 }
547
548 static void
549 assign_remap_locations(struct varying_loc (*remap)[4],
550 struct assigned_comps *assigned_comps,
551 struct varying_component *info,
552 unsigned *cursor, unsigned *comp,
553 unsigned max_location)
554 {
555 unsigned tmp_cursor = *cursor;
556 unsigned tmp_comp = *comp;
557
558 for (; tmp_cursor < max_location; tmp_cursor++) {
559
560 if (assigned_comps[tmp_cursor].comps) {
561 /* We can only pack varyings with matching interpolation types,
562 * interpolation loc must match also.
563 * TODO: i965 can handle interpolation locations that don't match,
564 * but the radeonsi nir backend handles everything as vec4s and so
565 * expects this to be the same for all components. We could make this
566 * check driver specfific or drop it if NIR ever become the only
567 * radeonsi backend.
568 */
569 if (assigned_comps[tmp_cursor].interp_type != info->interp_type ||
570 assigned_comps[tmp_cursor].interp_loc != info->interp_loc) {
571 tmp_comp = 0;
572 continue;
573 }
574
575 while (tmp_comp < 4 &&
576 (assigned_comps[tmp_cursor].comps & (1 << tmp_comp))) {
577 tmp_comp++;
578 }
579 }
580
581 if (tmp_comp == 4) {
582 tmp_comp = 0;
583 continue;
584 }
585
586 unsigned location = info->var->data.location - VARYING_SLOT_VAR0;
587
588 /* Once we have assigned a location mark it as used */
589 assigned_comps[tmp_cursor].comps |= (1 << tmp_comp);
590 assigned_comps[tmp_cursor].interp_type = info->interp_type;
591 assigned_comps[tmp_cursor].interp_loc = info->interp_loc;
592
593 /* Assign remap location */
594 remap[location][info->var->data.location_frac].component = tmp_comp++;
595 remap[location][info->var->data.location_frac].location =
596 tmp_cursor + VARYING_SLOT_VAR0;
597
598 break;
599 }
600
601 *cursor = tmp_cursor;
602 *comp = tmp_comp;
603 }
604
605 /* If there are empty components in the slot compact the remaining components
606 * as close to component 0 as possible. This will make it easier to fill the
607 * empty components with components from a different slot in a following pass.
608 */
609 static void
610 compact_components(nir_shader *producer, nir_shader *consumer,
611 struct assigned_comps *assigned_comps,
612 bool default_to_smooth_interp)
613 {
614 struct exec_list *input_list = &consumer->inputs;
615 struct exec_list *output_list = &producer->outputs;
616 struct varying_loc remap[MAX_VARYINGS_INCL_PATCH][4] = {{{0}, {0}}};
617 struct varying_component *varying_comp_info;
618 unsigned varying_comp_info_size;
619
620 /* Gather varying component info */
621 gather_varying_component_info(consumer, &varying_comp_info,
622 &varying_comp_info_size,
623 default_to_smooth_interp);
624
625 /* Sort varying components. */
626 qsort(varying_comp_info, varying_comp_info_size,
627 sizeof(struct varying_component), cmp_varying_component);
628
629 unsigned cursor = 0;
630 unsigned comp = 0;
631
632 /* Set the remap array based on the sorted components */
633 for (unsigned i = 0; i < varying_comp_info_size; i++ ) {
634 struct varying_component *info = &varying_comp_info[i];
635
636 assert(info->is_patch || cursor < MAX_VARYING);
637 if (info->is_patch) {
638 /* The list should be sorted with all non-patch inputs first followed
639 * by patch inputs. When we hit our first patch input, we need to
640 * reset the cursor to MAX_VARYING so we put them in the right slot.
641 */
642 if (cursor < MAX_VARYING) {
643 cursor = MAX_VARYING;
644 comp = 0;
645 }
646
647 assign_remap_locations(remap, assigned_comps, info,
648 &cursor, &comp, MAX_VARYINGS_INCL_PATCH);
649 } else {
650 assign_remap_locations(remap, assigned_comps, info,
651 &cursor, &comp, MAX_VARYING);
652
653 /* Check if we failed to assign a remap location. This can happen if
654 * for example there are a bunch of unmovable components with
655 * mismatching interpolation types causing us to skip over locations
656 * that would have been useful for packing later components.
657 * The solution is to iterate over the locations again (this should
658 * happen very rarely in practice).
659 */
660 if (cursor == MAX_VARYING) {
661 cursor = 0;
662 comp = 0;
663 assign_remap_locations(remap, assigned_comps, info,
664 &cursor, &comp, MAX_VARYING);
665 }
666 }
667 }
668
669 ralloc_free(varying_comp_info);
670
671 uint64_t zero = 0;
672 uint32_t zero32 = 0;
673 remap_slots_and_components(input_list, consumer->info.stage, remap,
674 &consumer->info.inputs_read, &zero,
675 &consumer->info.patch_inputs_read, &zero32);
676 remap_slots_and_components(output_list, producer->info.stage, remap,
677 &producer->info.outputs_written,
678 &producer->info.outputs_read,
679 &producer->info.patch_outputs_written,
680 &producer->info.patch_outputs_read);
681 }
682
683 /* We assume that this has been called more-or-less directly after
684 * remove_unused_varyings. At this point, all of the varyings that we
685 * aren't going to be using have been completely removed and the
686 * inputs_read and outputs_written fields in nir_shader_info reflect
687 * this. Therefore, the total set of valid slots is the OR of the two
688 * sets of varyings; this accounts for varyings which one side may need
689 * to read/write even if the other doesn't. This can happen if, for
690 * instance, an array is used indirectly from one side causing it to be
691 * unsplittable but directly from the other.
692 */
693 void
694 nir_compact_varyings(nir_shader *producer, nir_shader *consumer,
695 bool default_to_smooth_interp)
696 {
697 assert(producer->info.stage != MESA_SHADER_FRAGMENT);
698 assert(consumer->info.stage != MESA_SHADER_VERTEX);
699
700 struct assigned_comps assigned_comps[MAX_VARYINGS_INCL_PATCH] = {0};
701
702 get_unmoveable_components_masks(&producer->outputs, assigned_comps,
703 producer->info.stage,
704 default_to_smooth_interp);
705 get_unmoveable_components_masks(&consumer->inputs, assigned_comps,
706 consumer->info.stage,
707 default_to_smooth_interp);
708
709 compact_components(producer, consumer, assigned_comps,
710 default_to_smooth_interp);
711 }
712
713 /*
714 * Mark XFB varyings as always_active_io in the consumer so the linking opts
715 * don't touch them.
716 */
717 void
718 nir_link_xfb_varyings(nir_shader *producer, nir_shader *consumer)
719 {
720 nir_variable *input_vars[MAX_VARYING] = { 0 };
721
722 nir_foreach_variable(var, &consumer->inputs) {
723 if (var->data.location >= VARYING_SLOT_VAR0 &&
724 var->data.location - VARYING_SLOT_VAR0 < MAX_VARYING) {
725
726 unsigned location = var->data.location - VARYING_SLOT_VAR0;
727 input_vars[location] = var;
728 }
729 }
730
731 nir_foreach_variable(var, &producer->outputs) {
732 if (var->data.location >= VARYING_SLOT_VAR0 &&
733 var->data.location - VARYING_SLOT_VAR0 < MAX_VARYING) {
734
735 if (!var->data.always_active_io)
736 continue;
737
738 unsigned location = var->data.location - VARYING_SLOT_VAR0;
739 if (input_vars[location]) {
740 input_vars[location]->data.always_active_io = true;
741 }
742 }
743 }
744 }
745
746 static bool
747 does_varying_match(nir_variable *out_var, nir_variable *in_var)
748 {
749 return in_var->data.location == out_var->data.location &&
750 in_var->data.location_frac == out_var->data.location_frac;
751 }
752
753 static nir_variable *
754 get_matching_input_var(nir_shader *consumer, nir_variable *out_var)
755 {
756 nir_foreach_variable(var, &consumer->inputs) {
757 if (does_varying_match(out_var, var))
758 return var;
759 }
760
761 return NULL;
762 }
763
764 static bool
765 can_replace_varying(nir_variable *out_var)
766 {
767 /* Skip types that require more complex handling.
768 * TODO: add support for these types.
769 */
770 if (glsl_type_is_array(out_var->type) ||
771 glsl_type_is_dual_slot(out_var->type) ||
772 glsl_type_is_matrix(out_var->type) ||
773 glsl_type_is_struct_or_ifc(out_var->type))
774 return false;
775
776 /* Limit this pass to scalars for now to keep things simple. Most varyings
777 * should have been lowered to scalars at this point anyway.
778 */
779 if (!glsl_type_is_scalar(out_var->type))
780 return false;
781
782 if (out_var->data.location < VARYING_SLOT_VAR0 ||
783 out_var->data.location - VARYING_SLOT_VAR0 >= MAX_VARYING)
784 return false;
785
786 return true;
787 }
788
789 static bool
790 replace_constant_input(nir_shader *shader, nir_intrinsic_instr *store_intr)
791 {
792 nir_function_impl *impl = nir_shader_get_entrypoint(shader);
793
794 nir_builder b;
795 nir_builder_init(&b, impl);
796
797 nir_variable *out_var =
798 nir_deref_instr_get_variable(nir_src_as_deref(store_intr->src[0]));
799
800 bool progress = false;
801 nir_foreach_block(block, impl) {
802 nir_foreach_instr(instr, block) {
803 if (instr->type != nir_instr_type_intrinsic)
804 continue;
805
806 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
807 if (intr->intrinsic != nir_intrinsic_load_deref)
808 continue;
809
810 nir_deref_instr *in_deref = nir_src_as_deref(intr->src[0]);
811 if (in_deref->mode != nir_var_shader_in)
812 continue;
813
814 nir_variable *in_var = nir_deref_instr_get_variable(in_deref);
815
816 if (!does_varying_match(out_var, in_var))
817 continue;
818
819 b.cursor = nir_before_instr(instr);
820
821 nir_load_const_instr *out_const =
822 nir_instr_as_load_const(store_intr->src[1].ssa->parent_instr);
823
824 /* Add new const to replace the input */
825 nir_ssa_def *nconst = nir_build_imm(&b, store_intr->num_components,
826 intr->dest.ssa.bit_size,
827 out_const->value);
828
829 nir_ssa_def_rewrite_uses(&intr->dest.ssa, nir_src_for_ssa(nconst));
830
831 progress = true;
832 }
833 }
834
835 return progress;
836 }
837
838 static bool
839 replace_duplicate_input(nir_shader *shader, nir_variable *input_var,
840 nir_intrinsic_instr *dup_store_intr)
841 {
842 assert(input_var);
843
844 nir_function_impl *impl = nir_shader_get_entrypoint(shader);
845
846 nir_builder b;
847 nir_builder_init(&b, impl);
848
849 nir_variable *dup_out_var =
850 nir_deref_instr_get_variable(nir_src_as_deref(dup_store_intr->src[0]));
851
852 bool progress = false;
853 nir_foreach_block(block, impl) {
854 nir_foreach_instr(instr, block) {
855 if (instr->type != nir_instr_type_intrinsic)
856 continue;
857
858 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
859 if (intr->intrinsic != nir_intrinsic_load_deref)
860 continue;
861
862 nir_deref_instr *in_deref = nir_src_as_deref(intr->src[0]);
863 if (in_deref->mode != nir_var_shader_in)
864 continue;
865
866 nir_variable *in_var = nir_deref_instr_get_variable(in_deref);
867
868 if (!does_varying_match(dup_out_var, in_var) ||
869 in_var->data.interpolation != input_var->data.interpolation ||
870 get_interp_loc(in_var) != get_interp_loc(input_var))
871 continue;
872
873 b.cursor = nir_before_instr(instr);
874
875 nir_ssa_def *load = nir_load_var(&b, input_var);
876 nir_ssa_def_rewrite_uses(&intr->dest.ssa, nir_src_for_ssa(load));
877
878 progress = true;
879 }
880 }
881
882 return progress;
883 }
884
885 bool
886 nir_link_opt_varyings(nir_shader *producer, nir_shader *consumer)
887 {
888 /* TODO: Add support for more shader stage combinations */
889 if (consumer->info.stage != MESA_SHADER_FRAGMENT ||
890 (producer->info.stage != MESA_SHADER_VERTEX &&
891 producer->info.stage != MESA_SHADER_TESS_EVAL))
892 return false;
893
894 bool progress = false;
895
896 nir_function_impl *impl = nir_shader_get_entrypoint(producer);
897
898 struct hash_table *varying_values = _mesa_pointer_hash_table_create(NULL);
899
900 /* If we find a store in the last block of the producer we can be sure this
901 * is the only possible value for this output.
902 */
903 nir_block *last_block = nir_impl_last_block(impl);
904 nir_foreach_instr_reverse(instr, last_block) {
905 if (instr->type != nir_instr_type_intrinsic)
906 continue;
907
908 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
909
910 if (intr->intrinsic != nir_intrinsic_store_deref)
911 continue;
912
913 nir_deref_instr *out_deref = nir_src_as_deref(intr->src[0]);
914 if (out_deref->mode != nir_var_shader_out)
915 continue;
916
917 nir_variable *out_var = nir_deref_instr_get_variable(out_deref);
918 if (!can_replace_varying(out_var))
919 continue;
920
921 if (intr->src[1].ssa->parent_instr->type == nir_instr_type_load_const) {
922 progress |= replace_constant_input(consumer, intr);
923 } else {
924 struct hash_entry *entry =
925 _mesa_hash_table_search(varying_values, intr->src[1].ssa);
926 if (entry) {
927 progress |= replace_duplicate_input(consumer,
928 (nir_variable *) entry->data,
929 intr);
930 } else {
931 nir_variable *in_var = get_matching_input_var(consumer, out_var);
932 if (in_var) {
933 _mesa_hash_table_insert(varying_values, intr->src[1].ssa,
934 in_var);
935 }
936 }
937 }
938 }
939
940 _mesa_hash_table_destroy(varying_values, NULL);
941
942 return progress;
943 }