nir: do not pack varying with different types
[mesa.git] / src / compiler / nir / nir_linking_helpers.c
1 /*
2 * Copyright © 2015 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include "nir.h"
25 #include "nir_builder.h"
26 #include "util/set.h"
27 #include "util/hash_table.h"
28
29 /* This file contains various little helpers for doing simple linking in
30 * NIR. Eventually, we'll probably want a full-blown varying packing
31 * implementation in here. Right now, it just deletes unused things.
32 */
33
34 /**
35 * Returns the bits in the inputs_read, outputs_written, or
36 * system_values_read bitfield corresponding to this variable.
37 */
38 static uint64_t
39 get_variable_io_mask(nir_variable *var, gl_shader_stage stage)
40 {
41 if (var->data.location < 0)
42 return 0;
43
44 unsigned location = var->data.patch ?
45 var->data.location - VARYING_SLOT_PATCH0 : var->data.location;
46
47 assert(var->data.mode == nir_var_shader_in ||
48 var->data.mode == nir_var_shader_out ||
49 var->data.mode == nir_var_system_value);
50 assert(var->data.location >= 0);
51
52 const struct glsl_type *type = var->type;
53 if (nir_is_per_vertex_io(var, stage)) {
54 assert(glsl_type_is_array(type));
55 type = glsl_get_array_element(type);
56 }
57
58 unsigned slots = glsl_count_attribute_slots(type, false);
59 return ((1ull << slots) - 1) << location;
60 }
61
62 static void
63 tcs_add_output_reads(nir_shader *shader, uint64_t *read, uint64_t *patches_read)
64 {
65 nir_foreach_function(function, shader) {
66 if (!function->impl)
67 continue;
68
69 nir_foreach_block(block, function->impl) {
70 nir_foreach_instr(instr, block) {
71 if (instr->type != nir_instr_type_intrinsic)
72 continue;
73
74 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
75 if (intrin->intrinsic != nir_intrinsic_load_deref)
76 continue;
77
78 nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
79 if (deref->mode != nir_var_shader_out)
80 continue;
81
82 nir_variable *var = nir_deref_instr_get_variable(deref);
83 if (var->data.patch) {
84 patches_read[var->data.location_frac] |=
85 get_variable_io_mask(var, shader->info.stage);
86 } else {
87 read[var->data.location_frac] |=
88 get_variable_io_mask(var, shader->info.stage);
89 }
90 }
91 }
92 }
93 }
94
95 /**
96 * Helper for removing unused shader I/O variables, by demoting them to global
97 * variables (which may then by dead code eliminated).
98 *
99 * Example usage is:
100 *
101 * progress = nir_remove_unused_io_vars(producer,
102 * &producer->outputs,
103 * read, patches_read) ||
104 * progress;
105 *
106 * The "used" should be an array of 4 uint64_ts (probably of VARYING_BIT_*)
107 * representing each .location_frac used. Note that for vector variables,
108 * only the first channel (.location_frac) is examined for deciding if the
109 * variable is used!
110 */
111 bool
112 nir_remove_unused_io_vars(nir_shader *shader, struct exec_list *var_list,
113 uint64_t *used_by_other_stage,
114 uint64_t *used_by_other_stage_patches)
115 {
116 bool progress = false;
117 uint64_t *used;
118
119 nir_foreach_variable_safe(var, var_list) {
120 if (var->data.patch)
121 used = used_by_other_stage_patches;
122 else
123 used = used_by_other_stage;
124
125 if (var->data.location < VARYING_SLOT_VAR0 && var->data.location >= 0)
126 continue;
127
128 if (var->data.always_active_io)
129 continue;
130
131 if (var->data.explicit_xfb_buffer)
132 continue;
133
134 uint64_t other_stage = used[var->data.location_frac];
135
136 if (!(other_stage & get_variable_io_mask(var, shader->info.stage))) {
137 /* This one is invalid, make it a global variable instead */
138 var->data.location = 0;
139 var->data.mode = nir_var_shader_temp;
140
141 exec_node_remove(&var->node);
142 exec_list_push_tail(&shader->globals, &var->node);
143
144 progress = true;
145 }
146 }
147
148 if (progress)
149 nir_fixup_deref_modes(shader);
150
151 return progress;
152 }
153
154 bool
155 nir_remove_unused_varyings(nir_shader *producer, nir_shader *consumer)
156 {
157 assert(producer->info.stage != MESA_SHADER_FRAGMENT);
158 assert(consumer->info.stage != MESA_SHADER_VERTEX);
159
160 uint64_t read[4] = { 0 }, written[4] = { 0 };
161 uint64_t patches_read[4] = { 0 }, patches_written[4] = { 0 };
162
163 nir_foreach_variable(var, &producer->outputs) {
164 if (var->data.patch) {
165 patches_written[var->data.location_frac] |=
166 get_variable_io_mask(var, producer->info.stage);
167 } else {
168 written[var->data.location_frac] |=
169 get_variable_io_mask(var, producer->info.stage);
170 }
171 }
172
173 nir_foreach_variable(var, &consumer->inputs) {
174 if (var->data.patch) {
175 patches_read[var->data.location_frac] |=
176 get_variable_io_mask(var, consumer->info.stage);
177 } else {
178 read[var->data.location_frac] |=
179 get_variable_io_mask(var, consumer->info.stage);
180 }
181 }
182
183 /* Each TCS invocation can read data written by other TCS invocations,
184 * so even if the outputs are not used by the TES we must also make
185 * sure they are not read by the TCS before demoting them to globals.
186 */
187 if (producer->info.stage == MESA_SHADER_TESS_CTRL)
188 tcs_add_output_reads(producer, read, patches_read);
189
190 bool progress = false;
191 progress = nir_remove_unused_io_vars(producer, &producer->outputs, read,
192 patches_read);
193
194 progress = nir_remove_unused_io_vars(consumer, &consumer->inputs, written,
195 patches_written) || progress;
196
197 return progress;
198 }
199
200 static uint8_t
201 get_interp_type(nir_variable *var, const struct glsl_type *type,
202 bool default_to_smooth_interp)
203 {
204 if (glsl_type_is_integer(type))
205 return INTERP_MODE_FLAT;
206 else if (var->data.interpolation != INTERP_MODE_NONE)
207 return var->data.interpolation;
208 else if (default_to_smooth_interp)
209 return INTERP_MODE_SMOOTH;
210 else
211 return INTERP_MODE_NONE;
212 }
213
214 #define INTERPOLATE_LOC_SAMPLE 0
215 #define INTERPOLATE_LOC_CENTROID 1
216 #define INTERPOLATE_LOC_CENTER 2
217
218 static uint8_t
219 get_interp_loc(nir_variable *var)
220 {
221 if (var->data.sample)
222 return INTERPOLATE_LOC_SAMPLE;
223 else if (var->data.centroid)
224 return INTERPOLATE_LOC_CENTROID;
225 else
226 return INTERPOLATE_LOC_CENTER;
227 }
228
229 static bool
230 is_packing_supported_for_type(const struct glsl_type *type)
231 {
232 /* We ignore complex types such as arrays, matrices, structs and bitsizes
233 * other then 32bit. All other vector types should have been split into
234 * scalar variables by the lower_io_to_scalar pass. The only exception
235 * should be OpenGL xfb varyings.
236 * TODO: add support for more complex types?
237 */
238 return glsl_type_is_scalar(type) && glsl_type_is_32bit(type);
239 }
240
241 struct assigned_comps
242 {
243 uint8_t comps;
244 uint8_t interp_type;
245 uint8_t interp_loc;
246 bool is_32bit;
247 };
248
249 /* Packing arrays and dual slot varyings is difficult so to avoid complex
250 * algorithms this function just assigns them their existing location for now.
251 * TODO: allow better packing of complex types.
252 */
253 static void
254 get_unmoveable_components_masks(struct exec_list *var_list,
255 struct assigned_comps *comps,
256 gl_shader_stage stage,
257 bool default_to_smooth_interp)
258 {
259 nir_foreach_variable_safe(var, var_list) {
260 assert(var->data.location >= 0);
261
262 /* Only remap things that aren't built-ins. */
263 if (var->data.location >= VARYING_SLOT_VAR0 &&
264 var->data.location - VARYING_SLOT_VAR0 < MAX_VARYINGS_INCL_PATCH) {
265
266 const struct glsl_type *type = var->type;
267 if (nir_is_per_vertex_io(var, stage)) {
268 assert(glsl_type_is_array(type));
269 type = glsl_get_array_element(type);
270 }
271
272 /* If we can pack this varying then don't mark the components as
273 * used.
274 */
275 if (is_packing_supported_for_type(type))
276 continue;
277
278 unsigned location = var->data.location - VARYING_SLOT_VAR0;
279
280 unsigned elements =
281 glsl_type_is_vector_or_scalar(glsl_without_array(type)) ?
282 glsl_get_vector_elements(glsl_without_array(type)) : 4;
283
284 bool dual_slot = glsl_type_is_dual_slot(glsl_without_array(type));
285 unsigned slots = glsl_count_attribute_slots(type, false);
286 unsigned dmul = glsl_type_is_64bit(glsl_without_array(type)) ? 2 : 1;
287 unsigned comps_slot2 = 0;
288 for (unsigned i = 0; i < slots; i++) {
289 if (dual_slot) {
290 if (i & 1) {
291 comps[location + i].comps |= ((1 << comps_slot2) - 1);
292 } else {
293 unsigned num_comps = 4 - var->data.location_frac;
294 comps_slot2 = (elements * dmul) - num_comps;
295
296 /* Assume ARB_enhanced_layouts packing rules for doubles */
297 assert(var->data.location_frac == 0 ||
298 var->data.location_frac == 2);
299 assert(comps_slot2 <= 4);
300
301 comps[location + i].comps |=
302 ((1 << num_comps) - 1) << var->data.location_frac;
303 }
304 } else {
305 comps[location + i].comps |=
306 ((1 << (elements * dmul)) - 1) << var->data.location_frac;
307 }
308
309 comps[location + i].interp_type =
310 get_interp_type(var, type, default_to_smooth_interp);
311 comps[location + i].interp_loc = get_interp_loc(var);
312 comps[location + i].is_32bit = glsl_type_is_32bit(type);
313 }
314 }
315 }
316 }
317
318 struct varying_loc
319 {
320 uint8_t component;
321 uint32_t location;
322 };
323
324 static void
325 mark_all_used_slots(nir_variable *var, uint64_t *slots_used,
326 uint64_t slots_used_mask, unsigned num_slots)
327 {
328 unsigned loc_offset = var->data.patch ? VARYING_SLOT_PATCH0 : 0;
329
330 slots_used[var->data.patch ? 1 : 0] |= slots_used_mask &
331 BITFIELD64_RANGE(var->data.location - loc_offset, num_slots);
332 }
333
334 static void
335 mark_used_slot(nir_variable *var, uint64_t *slots_used, unsigned offset)
336 {
337 unsigned loc_offset = var->data.patch ? VARYING_SLOT_PATCH0 : 0;
338
339 slots_used[var->data.patch ? 1 : 0] |=
340 BITFIELD64_BIT(var->data.location - loc_offset + offset);
341 }
342
343 static void
344 remap_slots_and_components(struct exec_list *var_list, gl_shader_stage stage,
345 struct varying_loc (*remap)[4],
346 uint64_t *slots_used, uint64_t *out_slots_read,
347 uint32_t *p_slots_used, uint32_t *p_out_slots_read)
348 {
349 uint64_t out_slots_read_tmp[2] = {0};
350 uint64_t slots_used_tmp[2] = {0};
351
352 /* We don't touch builtins so just copy the bitmask */
353 slots_used_tmp[0] = *slots_used & BITFIELD64_RANGE(0, VARYING_SLOT_VAR0);
354
355 nir_foreach_variable(var, var_list) {
356 assert(var->data.location >= 0);
357
358 /* Only remap things that aren't built-ins */
359 if (var->data.location >= VARYING_SLOT_VAR0 &&
360 var->data.location - VARYING_SLOT_VAR0 < MAX_VARYINGS_INCL_PATCH) {
361
362 const struct glsl_type *type = var->type;
363 if (nir_is_per_vertex_io(var, stage)) {
364 assert(glsl_type_is_array(type));
365 type = glsl_get_array_element(type);
366 }
367
368 unsigned num_slots = glsl_count_attribute_slots(type, false);
369 bool used_across_stages = false;
370 bool outputs_read = false;
371
372 unsigned location = var->data.location - VARYING_SLOT_VAR0;
373 struct varying_loc *new_loc = &remap[location][var->data.location_frac];
374
375 unsigned loc_offset = var->data.patch ? VARYING_SLOT_PATCH0 : 0;
376 uint64_t used = var->data.patch ? *p_slots_used : *slots_used;
377 uint64_t outs_used =
378 var->data.patch ? *p_out_slots_read : *out_slots_read;
379 uint64_t slots =
380 BITFIELD64_RANGE(var->data.location - loc_offset, num_slots);
381
382 if (slots & used)
383 used_across_stages = true;
384
385 if (slots & outs_used)
386 outputs_read = true;
387
388 if (new_loc->location) {
389 var->data.location = new_loc->location;
390 var->data.location_frac = new_loc->component;
391 }
392
393 if (var->data.always_active_io) {
394 /* We can't apply link time optimisations (specifically array
395 * splitting) to these so we need to copy the existing mask
396 * otherwise we will mess up the mask for things like partially
397 * marked arrays.
398 */
399 if (used_across_stages)
400 mark_all_used_slots(var, slots_used_tmp, used, num_slots);
401
402 if (outputs_read) {
403 mark_all_used_slots(var, out_slots_read_tmp, outs_used,
404 num_slots);
405 }
406 } else {
407 for (unsigned i = 0; i < num_slots; i++) {
408 if (used_across_stages)
409 mark_used_slot(var, slots_used_tmp, i);
410
411 if (outputs_read)
412 mark_used_slot(var, out_slots_read_tmp, i);
413 }
414 }
415 }
416 }
417
418 *slots_used = slots_used_tmp[0];
419 *out_slots_read = out_slots_read_tmp[0];
420 *p_slots_used = slots_used_tmp[1];
421 *p_out_slots_read = out_slots_read_tmp[1];
422 }
423
424 struct varying_component {
425 nir_variable *var;
426 uint8_t interp_type;
427 uint8_t interp_loc;
428 bool is_32bit;
429 bool is_patch;
430 bool initialised;
431 };
432
433 static int
434 cmp_varying_component(const void *comp1_v, const void *comp2_v)
435 {
436 struct varying_component *comp1 = (struct varying_component *) comp1_v;
437 struct varying_component *comp2 = (struct varying_component *) comp2_v;
438
439 /* We want patches to be order at the end of the array */
440 if (comp1->is_patch != comp2->is_patch)
441 return comp1->is_patch ? 1 : -1;
442
443 /* We can only pack varyings with matching interpolation types so group
444 * them together.
445 */
446 if (comp1->interp_type != comp2->interp_type)
447 return comp1->interp_type - comp2->interp_type;
448
449 /* Interpolation loc must match also. */
450 if (comp1->interp_loc != comp2->interp_loc)
451 return comp1->interp_loc - comp2->interp_loc;
452
453 /* If everything else matches just use the original location to sort */
454 return comp1->var->data.location - comp2->var->data.location;
455 }
456
457 static void
458 gather_varying_component_info(nir_shader *consumer,
459 struct varying_component **varying_comp_info,
460 unsigned *varying_comp_info_size,
461 bool default_to_smooth_interp)
462 {
463 unsigned store_varying_info_idx[MAX_VARYINGS_INCL_PATCH][4] = {0};
464 unsigned num_of_comps_to_pack = 0;
465
466 /* Count the number of varying that can be packed and create a mapping
467 * of those varyings to the array we will pass to qsort.
468 */
469 nir_foreach_variable(var, &consumer->inputs) {
470
471 /* Only remap things that aren't builtins. */
472 if (var->data.location >= VARYING_SLOT_VAR0 &&
473 var->data.location - VARYING_SLOT_VAR0 < MAX_VARYINGS_INCL_PATCH) {
474
475 /* We can't repack xfb varyings. */
476 if (var->data.always_active_io)
477 continue;
478
479 const struct glsl_type *type = var->type;
480 if (nir_is_per_vertex_io(var, consumer->info.stage)) {
481 assert(glsl_type_is_array(type));
482 type = glsl_get_array_element(type);
483 }
484
485 if (!is_packing_supported_for_type(type))
486 continue;
487
488 unsigned loc = var->data.location - VARYING_SLOT_VAR0;
489 store_varying_info_idx[loc][var->data.location_frac] =
490 ++num_of_comps_to_pack;
491 }
492 }
493
494 *varying_comp_info_size = num_of_comps_to_pack;
495 *varying_comp_info = rzalloc_array(NULL, struct varying_component,
496 num_of_comps_to_pack);
497
498 nir_function_impl *impl = nir_shader_get_entrypoint(consumer);
499
500 /* Walk over the shader and populate the varying component info array */
501 nir_foreach_block(block, impl) {
502 nir_foreach_instr(instr, block) {
503 if (instr->type != nir_instr_type_intrinsic)
504 continue;
505
506 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
507 if (intr->intrinsic != nir_intrinsic_load_deref &&
508 intr->intrinsic != nir_intrinsic_interp_deref_at_centroid &&
509 intr->intrinsic != nir_intrinsic_interp_deref_at_sample &&
510 intr->intrinsic != nir_intrinsic_interp_deref_at_offset)
511 continue;
512
513 nir_deref_instr *deref = nir_src_as_deref(intr->src[0]);
514 if (deref->mode != nir_var_shader_in)
515 continue;
516
517 /* We only remap things that aren't builtins. */
518 nir_variable *in_var = nir_deref_instr_get_variable(deref);
519 if (in_var->data.location < VARYING_SLOT_VAR0)
520 continue;
521
522 unsigned location = in_var->data.location - VARYING_SLOT_VAR0;
523 if (location >= MAX_VARYINGS_INCL_PATCH)
524 continue;
525
526 unsigned var_info_idx =
527 store_varying_info_idx[location][in_var->data.location_frac];
528 if (!var_info_idx)
529 continue;
530
531 struct varying_component *vc_info =
532 &(*varying_comp_info)[var_info_idx-1];
533
534 if (!vc_info->initialised) {
535 const struct glsl_type *type = in_var->type;
536 if (nir_is_per_vertex_io(in_var, consumer->info.stage)) {
537 assert(glsl_type_is_array(type));
538 type = glsl_get_array_element(type);
539 }
540
541 vc_info->var = in_var;
542 vc_info->interp_type =
543 get_interp_type(in_var, type, default_to_smooth_interp);
544 vc_info->interp_loc = get_interp_loc(in_var);
545 vc_info->is_32bit = glsl_type_is_32bit(type);
546 vc_info->is_patch = in_var->data.patch;
547 }
548 }
549 }
550 }
551
552 static void
553 assign_remap_locations(struct varying_loc (*remap)[4],
554 struct assigned_comps *assigned_comps,
555 struct varying_component *info,
556 unsigned *cursor, unsigned *comp,
557 unsigned max_location)
558 {
559 unsigned tmp_cursor = *cursor;
560 unsigned tmp_comp = *comp;
561
562 for (; tmp_cursor < max_location; tmp_cursor++) {
563
564 if (assigned_comps[tmp_cursor].comps) {
565 /* We can only pack varyings with matching interpolation types,
566 * interpolation loc must match also.
567 * TODO: i965 can handle interpolation locations that don't match,
568 * but the radeonsi nir backend handles everything as vec4s and so
569 * expects this to be the same for all components. We could make this
570 * check driver specfific or drop it if NIR ever become the only
571 * radeonsi backend.
572 */
573 if (assigned_comps[tmp_cursor].interp_type != info->interp_type ||
574 assigned_comps[tmp_cursor].interp_loc != info->interp_loc) {
575 tmp_comp = 0;
576 continue;
577 }
578
579 /* We can only pack varyings with matching types, and the current
580 * algorithm only supports packing 32-bit.
581 */
582 if (!assigned_comps[tmp_cursor].is_32bit) {
583 tmp_comp = 0;
584 continue;
585 }
586
587 while (tmp_comp < 4 &&
588 (assigned_comps[tmp_cursor].comps & (1 << tmp_comp))) {
589 tmp_comp++;
590 }
591 }
592
593 if (tmp_comp == 4) {
594 tmp_comp = 0;
595 continue;
596 }
597
598 unsigned location = info->var->data.location - VARYING_SLOT_VAR0;
599
600 /* Once we have assigned a location mark it as used */
601 assigned_comps[tmp_cursor].comps |= (1 << tmp_comp);
602 assigned_comps[tmp_cursor].interp_type = info->interp_type;
603 assigned_comps[tmp_cursor].interp_loc = info->interp_loc;
604 assigned_comps[tmp_cursor].is_32bit = info->is_32bit;
605
606 /* Assign remap location */
607 remap[location][info->var->data.location_frac].component = tmp_comp++;
608 remap[location][info->var->data.location_frac].location =
609 tmp_cursor + VARYING_SLOT_VAR0;
610
611 break;
612 }
613
614 *cursor = tmp_cursor;
615 *comp = tmp_comp;
616 }
617
618 /* If there are empty components in the slot compact the remaining components
619 * as close to component 0 as possible. This will make it easier to fill the
620 * empty components with components from a different slot in a following pass.
621 */
622 static void
623 compact_components(nir_shader *producer, nir_shader *consumer,
624 struct assigned_comps *assigned_comps,
625 bool default_to_smooth_interp)
626 {
627 struct exec_list *input_list = &consumer->inputs;
628 struct exec_list *output_list = &producer->outputs;
629 struct varying_loc remap[MAX_VARYINGS_INCL_PATCH][4] = {{{0}, {0}}};
630 struct varying_component *varying_comp_info;
631 unsigned varying_comp_info_size;
632
633 /* Gather varying component info */
634 gather_varying_component_info(consumer, &varying_comp_info,
635 &varying_comp_info_size,
636 default_to_smooth_interp);
637
638 /* Sort varying components. */
639 qsort(varying_comp_info, varying_comp_info_size,
640 sizeof(struct varying_component), cmp_varying_component);
641
642 unsigned cursor = 0;
643 unsigned comp = 0;
644
645 /* Set the remap array based on the sorted components */
646 for (unsigned i = 0; i < varying_comp_info_size; i++ ) {
647 struct varying_component *info = &varying_comp_info[i];
648
649 assert(info->is_patch || cursor < MAX_VARYING);
650 if (info->is_patch) {
651 /* The list should be sorted with all non-patch inputs first followed
652 * by patch inputs. When we hit our first patch input, we need to
653 * reset the cursor to MAX_VARYING so we put them in the right slot.
654 */
655 if (cursor < MAX_VARYING) {
656 cursor = MAX_VARYING;
657 comp = 0;
658 }
659
660 assign_remap_locations(remap, assigned_comps, info,
661 &cursor, &comp, MAX_VARYINGS_INCL_PATCH);
662 } else {
663 assign_remap_locations(remap, assigned_comps, info,
664 &cursor, &comp, MAX_VARYING);
665
666 /* Check if we failed to assign a remap location. This can happen if
667 * for example there are a bunch of unmovable components with
668 * mismatching interpolation types causing us to skip over locations
669 * that would have been useful for packing later components.
670 * The solution is to iterate over the locations again (this should
671 * happen very rarely in practice).
672 */
673 if (cursor == MAX_VARYING) {
674 cursor = 0;
675 comp = 0;
676 assign_remap_locations(remap, assigned_comps, info,
677 &cursor, &comp, MAX_VARYING);
678 }
679 }
680 }
681
682 ralloc_free(varying_comp_info);
683
684 uint64_t zero = 0;
685 uint32_t zero32 = 0;
686 remap_slots_and_components(input_list, consumer->info.stage, remap,
687 &consumer->info.inputs_read, &zero,
688 &consumer->info.patch_inputs_read, &zero32);
689 remap_slots_and_components(output_list, producer->info.stage, remap,
690 &producer->info.outputs_written,
691 &producer->info.outputs_read,
692 &producer->info.patch_outputs_written,
693 &producer->info.patch_outputs_read);
694 }
695
696 /* We assume that this has been called more-or-less directly after
697 * remove_unused_varyings. At this point, all of the varyings that we
698 * aren't going to be using have been completely removed and the
699 * inputs_read and outputs_written fields in nir_shader_info reflect
700 * this. Therefore, the total set of valid slots is the OR of the two
701 * sets of varyings; this accounts for varyings which one side may need
702 * to read/write even if the other doesn't. This can happen if, for
703 * instance, an array is used indirectly from one side causing it to be
704 * unsplittable but directly from the other.
705 */
706 void
707 nir_compact_varyings(nir_shader *producer, nir_shader *consumer,
708 bool default_to_smooth_interp)
709 {
710 assert(producer->info.stage != MESA_SHADER_FRAGMENT);
711 assert(consumer->info.stage != MESA_SHADER_VERTEX);
712
713 struct assigned_comps assigned_comps[MAX_VARYINGS_INCL_PATCH] = {0};
714
715 get_unmoveable_components_masks(&producer->outputs, assigned_comps,
716 producer->info.stage,
717 default_to_smooth_interp);
718 get_unmoveable_components_masks(&consumer->inputs, assigned_comps,
719 consumer->info.stage,
720 default_to_smooth_interp);
721
722 compact_components(producer, consumer, assigned_comps,
723 default_to_smooth_interp);
724 }
725
726 /*
727 * Mark XFB varyings as always_active_io in the consumer so the linking opts
728 * don't touch them.
729 */
730 void
731 nir_link_xfb_varyings(nir_shader *producer, nir_shader *consumer)
732 {
733 nir_variable *input_vars[MAX_VARYING] = { 0 };
734
735 nir_foreach_variable(var, &consumer->inputs) {
736 if (var->data.location >= VARYING_SLOT_VAR0 &&
737 var->data.location - VARYING_SLOT_VAR0 < MAX_VARYING) {
738
739 unsigned location = var->data.location - VARYING_SLOT_VAR0;
740 input_vars[location] = var;
741 }
742 }
743
744 nir_foreach_variable(var, &producer->outputs) {
745 if (var->data.location >= VARYING_SLOT_VAR0 &&
746 var->data.location - VARYING_SLOT_VAR0 < MAX_VARYING) {
747
748 if (!var->data.always_active_io)
749 continue;
750
751 unsigned location = var->data.location - VARYING_SLOT_VAR0;
752 if (input_vars[location]) {
753 input_vars[location]->data.always_active_io = true;
754 }
755 }
756 }
757 }
758
759 static bool
760 does_varying_match(nir_variable *out_var, nir_variable *in_var)
761 {
762 return in_var->data.location == out_var->data.location &&
763 in_var->data.location_frac == out_var->data.location_frac;
764 }
765
766 static nir_variable *
767 get_matching_input_var(nir_shader *consumer, nir_variable *out_var)
768 {
769 nir_foreach_variable(var, &consumer->inputs) {
770 if (does_varying_match(out_var, var))
771 return var;
772 }
773
774 return NULL;
775 }
776
777 static bool
778 can_replace_varying(nir_variable *out_var)
779 {
780 /* Skip types that require more complex handling.
781 * TODO: add support for these types.
782 */
783 if (glsl_type_is_array(out_var->type) ||
784 glsl_type_is_dual_slot(out_var->type) ||
785 glsl_type_is_matrix(out_var->type) ||
786 glsl_type_is_struct_or_ifc(out_var->type))
787 return false;
788
789 /* Limit this pass to scalars for now to keep things simple. Most varyings
790 * should have been lowered to scalars at this point anyway.
791 */
792 if (!glsl_type_is_scalar(out_var->type))
793 return false;
794
795 if (out_var->data.location < VARYING_SLOT_VAR0 ||
796 out_var->data.location - VARYING_SLOT_VAR0 >= MAX_VARYING)
797 return false;
798
799 return true;
800 }
801
802 static bool
803 replace_constant_input(nir_shader *shader, nir_intrinsic_instr *store_intr)
804 {
805 nir_function_impl *impl = nir_shader_get_entrypoint(shader);
806
807 nir_builder b;
808 nir_builder_init(&b, impl);
809
810 nir_variable *out_var =
811 nir_deref_instr_get_variable(nir_src_as_deref(store_intr->src[0]));
812
813 bool progress = false;
814 nir_foreach_block(block, impl) {
815 nir_foreach_instr(instr, block) {
816 if (instr->type != nir_instr_type_intrinsic)
817 continue;
818
819 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
820 if (intr->intrinsic != nir_intrinsic_load_deref)
821 continue;
822
823 nir_deref_instr *in_deref = nir_src_as_deref(intr->src[0]);
824 if (in_deref->mode != nir_var_shader_in)
825 continue;
826
827 nir_variable *in_var = nir_deref_instr_get_variable(in_deref);
828
829 if (!does_varying_match(out_var, in_var))
830 continue;
831
832 b.cursor = nir_before_instr(instr);
833
834 nir_load_const_instr *out_const =
835 nir_instr_as_load_const(store_intr->src[1].ssa->parent_instr);
836
837 /* Add new const to replace the input */
838 nir_ssa_def *nconst = nir_build_imm(&b, store_intr->num_components,
839 intr->dest.ssa.bit_size,
840 out_const->value);
841
842 nir_ssa_def_rewrite_uses(&intr->dest.ssa, nir_src_for_ssa(nconst));
843
844 progress = true;
845 }
846 }
847
848 return progress;
849 }
850
851 static bool
852 replace_duplicate_input(nir_shader *shader, nir_variable *input_var,
853 nir_intrinsic_instr *dup_store_intr)
854 {
855 assert(input_var);
856
857 nir_function_impl *impl = nir_shader_get_entrypoint(shader);
858
859 nir_builder b;
860 nir_builder_init(&b, impl);
861
862 nir_variable *dup_out_var =
863 nir_deref_instr_get_variable(nir_src_as_deref(dup_store_intr->src[0]));
864
865 bool progress = false;
866 nir_foreach_block(block, impl) {
867 nir_foreach_instr(instr, block) {
868 if (instr->type != nir_instr_type_intrinsic)
869 continue;
870
871 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
872 if (intr->intrinsic != nir_intrinsic_load_deref)
873 continue;
874
875 nir_deref_instr *in_deref = nir_src_as_deref(intr->src[0]);
876 if (in_deref->mode != nir_var_shader_in)
877 continue;
878
879 nir_variable *in_var = nir_deref_instr_get_variable(in_deref);
880
881 if (!does_varying_match(dup_out_var, in_var) ||
882 in_var->data.interpolation != input_var->data.interpolation ||
883 get_interp_loc(in_var) != get_interp_loc(input_var))
884 continue;
885
886 b.cursor = nir_before_instr(instr);
887
888 nir_ssa_def *load = nir_load_var(&b, input_var);
889 nir_ssa_def_rewrite_uses(&intr->dest.ssa, nir_src_for_ssa(load));
890
891 progress = true;
892 }
893 }
894
895 return progress;
896 }
897
898 bool
899 nir_link_opt_varyings(nir_shader *producer, nir_shader *consumer)
900 {
901 /* TODO: Add support for more shader stage combinations */
902 if (consumer->info.stage != MESA_SHADER_FRAGMENT ||
903 (producer->info.stage != MESA_SHADER_VERTEX &&
904 producer->info.stage != MESA_SHADER_TESS_EVAL))
905 return false;
906
907 bool progress = false;
908
909 nir_function_impl *impl = nir_shader_get_entrypoint(producer);
910
911 struct hash_table *varying_values = _mesa_pointer_hash_table_create(NULL);
912
913 /* If we find a store in the last block of the producer we can be sure this
914 * is the only possible value for this output.
915 */
916 nir_block *last_block = nir_impl_last_block(impl);
917 nir_foreach_instr_reverse(instr, last_block) {
918 if (instr->type != nir_instr_type_intrinsic)
919 continue;
920
921 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
922
923 if (intr->intrinsic != nir_intrinsic_store_deref)
924 continue;
925
926 nir_deref_instr *out_deref = nir_src_as_deref(intr->src[0]);
927 if (out_deref->mode != nir_var_shader_out)
928 continue;
929
930 nir_variable *out_var = nir_deref_instr_get_variable(out_deref);
931 if (!can_replace_varying(out_var))
932 continue;
933
934 if (intr->src[1].ssa->parent_instr->type == nir_instr_type_load_const) {
935 progress |= replace_constant_input(consumer, intr);
936 } else {
937 struct hash_entry *entry =
938 _mesa_hash_table_search(varying_values, intr->src[1].ssa);
939 if (entry) {
940 progress |= replace_duplicate_input(consumer,
941 (nir_variable *) entry->data,
942 intr);
943 } else {
944 nir_variable *in_var = get_matching_input_var(consumer, out_var);
945 if (in_var) {
946 _mesa_hash_table_insert(varying_values, intr->src[1].ssa,
947 in_var);
948 }
949 }
950 }
951 }
952
953 _mesa_hash_table_destroy(varying_values, NULL);
954
955 return progress;
956 }