nir: fix packing components with arrays
[mesa.git] / src / compiler / nir / nir_linking_helpers.c
1 /*
2 * Copyright © 2015 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include "nir.h"
25 #include "nir_builder.h"
26 #include "util/set.h"
27 #include "util/hash_table.h"
28
29 /* This file contains various little helpers for doing simple linking in
30 * NIR. Eventually, we'll probably want a full-blown varying packing
31 * implementation in here. Right now, it just deletes unused things.
32 */
33
34 /**
35 * Returns the bits in the inputs_read, outputs_written, or
36 * system_values_read bitfield corresponding to this variable.
37 */
38 static uint64_t
39 get_variable_io_mask(nir_variable *var, gl_shader_stage stage)
40 {
41 if (var->data.location < 0)
42 return 0;
43
44 unsigned location = var->data.patch ?
45 var->data.location - VARYING_SLOT_PATCH0 : var->data.location;
46
47 assert(var->data.mode == nir_var_shader_in ||
48 var->data.mode == nir_var_shader_out ||
49 var->data.mode == nir_var_system_value);
50 assert(var->data.location >= 0);
51
52 const struct glsl_type *type = var->type;
53 if (nir_is_per_vertex_io(var, stage)) {
54 assert(glsl_type_is_array(type));
55 type = glsl_get_array_element(type);
56 }
57
58 unsigned slots = glsl_count_attribute_slots(type, false);
59 return ((1ull << slots) - 1) << location;
60 }
61
62 static void
63 tcs_add_output_reads(nir_shader *shader, uint64_t *read, uint64_t *patches_read)
64 {
65 nir_foreach_function(function, shader) {
66 if (!function->impl)
67 continue;
68
69 nir_foreach_block(block, function->impl) {
70 nir_foreach_instr(instr, block) {
71 if (instr->type != nir_instr_type_intrinsic)
72 continue;
73
74 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
75 if (intrin->intrinsic != nir_intrinsic_load_deref)
76 continue;
77
78 nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
79 if (deref->mode != nir_var_shader_out)
80 continue;
81
82 nir_variable *var = nir_deref_instr_get_variable(deref);
83 if (var->data.patch) {
84 patches_read[var->data.location_frac] |=
85 get_variable_io_mask(var, shader->info.stage);
86 } else {
87 read[var->data.location_frac] |=
88 get_variable_io_mask(var, shader->info.stage);
89 }
90 }
91 }
92 }
93 }
94
95 /**
96 * Helper for removing unused shader I/O variables, by demoting them to global
97 * variables (which may then by dead code eliminated).
98 *
99 * Example usage is:
100 *
101 * progress = nir_remove_unused_io_vars(producer,
102 * &producer->outputs,
103 * read, patches_read) ||
104 * progress;
105 *
106 * The "used" should be an array of 4 uint64_ts (probably of VARYING_BIT_*)
107 * representing each .location_frac used. Note that for vector variables,
108 * only the first channel (.location_frac) is examined for deciding if the
109 * variable is used!
110 */
111 bool
112 nir_remove_unused_io_vars(nir_shader *shader, struct exec_list *var_list,
113 uint64_t *used_by_other_stage,
114 uint64_t *used_by_other_stage_patches)
115 {
116 bool progress = false;
117 uint64_t *used;
118
119 nir_foreach_variable_safe(var, var_list) {
120 if (var->data.patch)
121 used = used_by_other_stage_patches;
122 else
123 used = used_by_other_stage;
124
125 if (var->data.location < VARYING_SLOT_VAR0 && var->data.location >= 0)
126 continue;
127
128 if (var->data.always_active_io)
129 continue;
130
131 if (var->data.explicit_xfb_buffer)
132 continue;
133
134 uint64_t other_stage = used[var->data.location_frac];
135
136 if (!(other_stage & get_variable_io_mask(var, shader->info.stage))) {
137 /* This one is invalid, make it a global variable instead */
138 var->data.location = 0;
139 var->data.mode = nir_var_shader_temp;
140
141 exec_node_remove(&var->node);
142 exec_list_push_tail(&shader->globals, &var->node);
143
144 progress = true;
145 }
146 }
147
148 if (progress)
149 nir_fixup_deref_modes(shader);
150
151 return progress;
152 }
153
154 bool
155 nir_remove_unused_varyings(nir_shader *producer, nir_shader *consumer)
156 {
157 assert(producer->info.stage != MESA_SHADER_FRAGMENT);
158 assert(consumer->info.stage != MESA_SHADER_VERTEX);
159
160 uint64_t read[4] = { 0 }, written[4] = { 0 };
161 uint64_t patches_read[4] = { 0 }, patches_written[4] = { 0 };
162
163 nir_foreach_variable(var, &producer->outputs) {
164 if (var->data.patch) {
165 patches_written[var->data.location_frac] |=
166 get_variable_io_mask(var, producer->info.stage);
167 } else {
168 written[var->data.location_frac] |=
169 get_variable_io_mask(var, producer->info.stage);
170 }
171 }
172
173 nir_foreach_variable(var, &consumer->inputs) {
174 if (var->data.patch) {
175 patches_read[var->data.location_frac] |=
176 get_variable_io_mask(var, consumer->info.stage);
177 } else {
178 read[var->data.location_frac] |=
179 get_variable_io_mask(var, consumer->info.stage);
180 }
181 }
182
183 /* Each TCS invocation can read data written by other TCS invocations,
184 * so even if the outputs are not used by the TES we must also make
185 * sure they are not read by the TCS before demoting them to globals.
186 */
187 if (producer->info.stage == MESA_SHADER_TESS_CTRL)
188 tcs_add_output_reads(producer, read, patches_read);
189
190 bool progress = false;
191 progress = nir_remove_unused_io_vars(producer, &producer->outputs, read,
192 patches_read);
193
194 progress = nir_remove_unused_io_vars(consumer, &consumer->inputs, written,
195 patches_written) || progress;
196
197 return progress;
198 }
199
200 static uint8_t
201 get_interp_type(nir_variable *var, const struct glsl_type *type,
202 bool default_to_smooth_interp)
203 {
204 if (glsl_type_is_integer(type))
205 return INTERP_MODE_FLAT;
206 else if (var->data.interpolation != INTERP_MODE_NONE)
207 return var->data.interpolation;
208 else if (default_to_smooth_interp)
209 return INTERP_MODE_SMOOTH;
210 else
211 return INTERP_MODE_NONE;
212 }
213
214 #define INTERPOLATE_LOC_SAMPLE 0
215 #define INTERPOLATE_LOC_CENTROID 1
216 #define INTERPOLATE_LOC_CENTER 2
217
218 static uint8_t
219 get_interp_loc(nir_variable *var)
220 {
221 if (var->data.sample)
222 return INTERPOLATE_LOC_SAMPLE;
223 else if (var->data.centroid)
224 return INTERPOLATE_LOC_CENTROID;
225 else
226 return INTERPOLATE_LOC_CENTER;
227 }
228
229 static bool
230 is_packing_supported_for_type(const struct glsl_type *type)
231 {
232 /* We ignore complex types such as arrays, matrices, structs and bitsizes
233 * other then 32bit. All other vector types should have been split into
234 * scalar variables by the lower_io_to_scalar pass. The only exception
235 * should be OpenGL xfb varyings.
236 * TODO: add support for more complex types?
237 */
238 return glsl_type_is_scalar(type) && glsl_type_is_32bit(type);
239 }
240
241 struct assigned_comps
242 {
243 uint8_t comps;
244 uint8_t interp_type;
245 uint8_t interp_loc;
246 bool is_32bit;
247 };
248
249 /* Packing arrays and dual slot varyings is difficult so to avoid complex
250 * algorithms this function just assigns them their existing location for now.
251 * TODO: allow better packing of complex types.
252 */
253 static void
254 get_unmoveable_components_masks(struct exec_list *var_list,
255 struct assigned_comps *comps,
256 gl_shader_stage stage,
257 bool default_to_smooth_interp)
258 {
259 nir_foreach_variable_safe(var, var_list) {
260 assert(var->data.location >= 0);
261
262 /* Only remap things that aren't built-ins. */
263 if (var->data.location >= VARYING_SLOT_VAR0 &&
264 var->data.location - VARYING_SLOT_VAR0 < MAX_VARYINGS_INCL_PATCH) {
265
266 const struct glsl_type *type = var->type;
267 if (nir_is_per_vertex_io(var, stage)) {
268 assert(glsl_type_is_array(type));
269 type = glsl_get_array_element(type);
270 }
271
272 /* If we can pack this varying then don't mark the components as
273 * used.
274 */
275 if (is_packing_supported_for_type(type))
276 continue;
277
278 unsigned location = var->data.location - VARYING_SLOT_VAR0;
279
280 unsigned elements =
281 glsl_type_is_vector_or_scalar(glsl_without_array(type)) ?
282 glsl_get_vector_elements(glsl_without_array(type)) : 4;
283
284 bool dual_slot = glsl_type_is_dual_slot(glsl_without_array(type));
285 unsigned slots = glsl_count_attribute_slots(type, false);
286 unsigned dmul = glsl_type_is_64bit(glsl_without_array(type)) ? 2 : 1;
287 unsigned comps_slot2 = 0;
288 for (unsigned i = 0; i < slots; i++) {
289 if (dual_slot) {
290 if (i & 1) {
291 comps[location + i].comps |= ((1 << comps_slot2) - 1);
292 } else {
293 unsigned num_comps = 4 - var->data.location_frac;
294 comps_slot2 = (elements * dmul) - num_comps;
295
296 /* Assume ARB_enhanced_layouts packing rules for doubles */
297 assert(var->data.location_frac == 0 ||
298 var->data.location_frac == 2);
299 assert(comps_slot2 <= 4);
300
301 comps[location + i].comps |=
302 ((1 << num_comps) - 1) << var->data.location_frac;
303 }
304 } else {
305 comps[location + i].comps |=
306 ((1 << (elements * dmul)) - 1) << var->data.location_frac;
307 }
308
309 comps[location + i].interp_type =
310 get_interp_type(var, type, default_to_smooth_interp);
311 comps[location + i].interp_loc = get_interp_loc(var);
312 comps[location + i].is_32bit =
313 glsl_type_is_32bit(glsl_without_array(type));
314 }
315 }
316 }
317 }
318
319 struct varying_loc
320 {
321 uint8_t component;
322 uint32_t location;
323 };
324
325 static void
326 mark_all_used_slots(nir_variable *var, uint64_t *slots_used,
327 uint64_t slots_used_mask, unsigned num_slots)
328 {
329 unsigned loc_offset = var->data.patch ? VARYING_SLOT_PATCH0 : 0;
330
331 slots_used[var->data.patch ? 1 : 0] |= slots_used_mask &
332 BITFIELD64_RANGE(var->data.location - loc_offset, num_slots);
333 }
334
335 static void
336 mark_used_slot(nir_variable *var, uint64_t *slots_used, unsigned offset)
337 {
338 unsigned loc_offset = var->data.patch ? VARYING_SLOT_PATCH0 : 0;
339
340 slots_used[var->data.patch ? 1 : 0] |=
341 BITFIELD64_BIT(var->data.location - loc_offset + offset);
342 }
343
344 static void
345 remap_slots_and_components(struct exec_list *var_list, gl_shader_stage stage,
346 struct varying_loc (*remap)[4],
347 uint64_t *slots_used, uint64_t *out_slots_read,
348 uint32_t *p_slots_used, uint32_t *p_out_slots_read)
349 {
350 uint64_t out_slots_read_tmp[2] = {0};
351 uint64_t slots_used_tmp[2] = {0};
352
353 /* We don't touch builtins so just copy the bitmask */
354 slots_used_tmp[0] = *slots_used & BITFIELD64_RANGE(0, VARYING_SLOT_VAR0);
355
356 nir_foreach_variable(var, var_list) {
357 assert(var->data.location >= 0);
358
359 /* Only remap things that aren't built-ins */
360 if (var->data.location >= VARYING_SLOT_VAR0 &&
361 var->data.location - VARYING_SLOT_VAR0 < MAX_VARYINGS_INCL_PATCH) {
362
363 const struct glsl_type *type = var->type;
364 if (nir_is_per_vertex_io(var, stage)) {
365 assert(glsl_type_is_array(type));
366 type = glsl_get_array_element(type);
367 }
368
369 unsigned num_slots = glsl_count_attribute_slots(type, false);
370 bool used_across_stages = false;
371 bool outputs_read = false;
372
373 unsigned location = var->data.location - VARYING_SLOT_VAR0;
374 struct varying_loc *new_loc = &remap[location][var->data.location_frac];
375
376 unsigned loc_offset = var->data.patch ? VARYING_SLOT_PATCH0 : 0;
377 uint64_t used = var->data.patch ? *p_slots_used : *slots_used;
378 uint64_t outs_used =
379 var->data.patch ? *p_out_slots_read : *out_slots_read;
380 uint64_t slots =
381 BITFIELD64_RANGE(var->data.location - loc_offset, num_slots);
382
383 if (slots & used)
384 used_across_stages = true;
385
386 if (slots & outs_used)
387 outputs_read = true;
388
389 if (new_loc->location) {
390 var->data.location = new_loc->location;
391 var->data.location_frac = new_loc->component;
392 }
393
394 if (var->data.always_active_io) {
395 /* We can't apply link time optimisations (specifically array
396 * splitting) to these so we need to copy the existing mask
397 * otherwise we will mess up the mask for things like partially
398 * marked arrays.
399 */
400 if (used_across_stages)
401 mark_all_used_slots(var, slots_used_tmp, used, num_slots);
402
403 if (outputs_read) {
404 mark_all_used_slots(var, out_slots_read_tmp, outs_used,
405 num_slots);
406 }
407 } else {
408 for (unsigned i = 0; i < num_slots; i++) {
409 if (used_across_stages)
410 mark_used_slot(var, slots_used_tmp, i);
411
412 if (outputs_read)
413 mark_used_slot(var, out_slots_read_tmp, i);
414 }
415 }
416 }
417 }
418
419 *slots_used = slots_used_tmp[0];
420 *out_slots_read = out_slots_read_tmp[0];
421 *p_slots_used = slots_used_tmp[1];
422 *p_out_slots_read = out_slots_read_tmp[1];
423 }
424
425 struct varying_component {
426 nir_variable *var;
427 uint8_t interp_type;
428 uint8_t interp_loc;
429 bool is_32bit;
430 bool is_patch;
431 bool initialised;
432 };
433
434 static int
435 cmp_varying_component(const void *comp1_v, const void *comp2_v)
436 {
437 struct varying_component *comp1 = (struct varying_component *) comp1_v;
438 struct varying_component *comp2 = (struct varying_component *) comp2_v;
439
440 /* We want patches to be order at the end of the array */
441 if (comp1->is_patch != comp2->is_patch)
442 return comp1->is_patch ? 1 : -1;
443
444 /* We can only pack varyings with matching interpolation types so group
445 * them together.
446 */
447 if (comp1->interp_type != comp2->interp_type)
448 return comp1->interp_type - comp2->interp_type;
449
450 /* Interpolation loc must match also. */
451 if (comp1->interp_loc != comp2->interp_loc)
452 return comp1->interp_loc - comp2->interp_loc;
453
454 /* If everything else matches just use the original location to sort */
455 return comp1->var->data.location - comp2->var->data.location;
456 }
457
458 static void
459 gather_varying_component_info(nir_shader *consumer,
460 struct varying_component **varying_comp_info,
461 unsigned *varying_comp_info_size,
462 bool default_to_smooth_interp)
463 {
464 unsigned store_varying_info_idx[MAX_VARYINGS_INCL_PATCH][4] = {0};
465 unsigned num_of_comps_to_pack = 0;
466
467 /* Count the number of varying that can be packed and create a mapping
468 * of those varyings to the array we will pass to qsort.
469 */
470 nir_foreach_variable(var, &consumer->inputs) {
471
472 /* Only remap things that aren't builtins. */
473 if (var->data.location >= VARYING_SLOT_VAR0 &&
474 var->data.location - VARYING_SLOT_VAR0 < MAX_VARYINGS_INCL_PATCH) {
475
476 /* We can't repack xfb varyings. */
477 if (var->data.always_active_io)
478 continue;
479
480 const struct glsl_type *type = var->type;
481 if (nir_is_per_vertex_io(var, consumer->info.stage)) {
482 assert(glsl_type_is_array(type));
483 type = glsl_get_array_element(type);
484 }
485
486 if (!is_packing_supported_for_type(type))
487 continue;
488
489 unsigned loc = var->data.location - VARYING_SLOT_VAR0;
490 store_varying_info_idx[loc][var->data.location_frac] =
491 ++num_of_comps_to_pack;
492 }
493 }
494
495 *varying_comp_info_size = num_of_comps_to_pack;
496 *varying_comp_info = rzalloc_array(NULL, struct varying_component,
497 num_of_comps_to_pack);
498
499 nir_function_impl *impl = nir_shader_get_entrypoint(consumer);
500
501 /* Walk over the shader and populate the varying component info array */
502 nir_foreach_block(block, impl) {
503 nir_foreach_instr(instr, block) {
504 if (instr->type != nir_instr_type_intrinsic)
505 continue;
506
507 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
508 if (intr->intrinsic != nir_intrinsic_load_deref &&
509 intr->intrinsic != nir_intrinsic_interp_deref_at_centroid &&
510 intr->intrinsic != nir_intrinsic_interp_deref_at_sample &&
511 intr->intrinsic != nir_intrinsic_interp_deref_at_offset)
512 continue;
513
514 nir_deref_instr *deref = nir_src_as_deref(intr->src[0]);
515 if (deref->mode != nir_var_shader_in)
516 continue;
517
518 /* We only remap things that aren't builtins. */
519 nir_variable *in_var = nir_deref_instr_get_variable(deref);
520 if (in_var->data.location < VARYING_SLOT_VAR0)
521 continue;
522
523 unsigned location = in_var->data.location - VARYING_SLOT_VAR0;
524 if (location >= MAX_VARYINGS_INCL_PATCH)
525 continue;
526
527 unsigned var_info_idx =
528 store_varying_info_idx[location][in_var->data.location_frac];
529 if (!var_info_idx)
530 continue;
531
532 struct varying_component *vc_info =
533 &(*varying_comp_info)[var_info_idx-1];
534
535 if (!vc_info->initialised) {
536 const struct glsl_type *type = in_var->type;
537 if (nir_is_per_vertex_io(in_var, consumer->info.stage)) {
538 assert(glsl_type_is_array(type));
539 type = glsl_get_array_element(type);
540 }
541
542 vc_info->var = in_var;
543 vc_info->interp_type =
544 get_interp_type(in_var, type, default_to_smooth_interp);
545 vc_info->interp_loc = get_interp_loc(in_var);
546 vc_info->is_32bit = glsl_type_is_32bit(type);
547 vc_info->is_patch = in_var->data.patch;
548 }
549 }
550 }
551 }
552
553 static void
554 assign_remap_locations(struct varying_loc (*remap)[4],
555 struct assigned_comps *assigned_comps,
556 struct varying_component *info,
557 unsigned *cursor, unsigned *comp,
558 unsigned max_location)
559 {
560 unsigned tmp_cursor = *cursor;
561 unsigned tmp_comp = *comp;
562
563 for (; tmp_cursor < max_location; tmp_cursor++) {
564
565 if (assigned_comps[tmp_cursor].comps) {
566 /* We can only pack varyings with matching interpolation types,
567 * interpolation loc must match also.
568 * TODO: i965 can handle interpolation locations that don't match,
569 * but the radeonsi nir backend handles everything as vec4s and so
570 * expects this to be the same for all components. We could make this
571 * check driver specfific or drop it if NIR ever become the only
572 * radeonsi backend.
573 */
574 if (assigned_comps[tmp_cursor].interp_type != info->interp_type ||
575 assigned_comps[tmp_cursor].interp_loc != info->interp_loc) {
576 tmp_comp = 0;
577 continue;
578 }
579
580 /* We can only pack varyings with matching types, and the current
581 * algorithm only supports packing 32-bit.
582 */
583 if (!assigned_comps[tmp_cursor].is_32bit) {
584 tmp_comp = 0;
585 continue;
586 }
587
588 while (tmp_comp < 4 &&
589 (assigned_comps[tmp_cursor].comps & (1 << tmp_comp))) {
590 tmp_comp++;
591 }
592 }
593
594 if (tmp_comp == 4) {
595 tmp_comp = 0;
596 continue;
597 }
598
599 unsigned location = info->var->data.location - VARYING_SLOT_VAR0;
600
601 /* Once we have assigned a location mark it as used */
602 assigned_comps[tmp_cursor].comps |= (1 << tmp_comp);
603 assigned_comps[tmp_cursor].interp_type = info->interp_type;
604 assigned_comps[tmp_cursor].interp_loc = info->interp_loc;
605 assigned_comps[tmp_cursor].is_32bit = info->is_32bit;
606
607 /* Assign remap location */
608 remap[location][info->var->data.location_frac].component = tmp_comp++;
609 remap[location][info->var->data.location_frac].location =
610 tmp_cursor + VARYING_SLOT_VAR0;
611
612 break;
613 }
614
615 *cursor = tmp_cursor;
616 *comp = tmp_comp;
617 }
618
619 /* If there are empty components in the slot compact the remaining components
620 * as close to component 0 as possible. This will make it easier to fill the
621 * empty components with components from a different slot in a following pass.
622 */
623 static void
624 compact_components(nir_shader *producer, nir_shader *consumer,
625 struct assigned_comps *assigned_comps,
626 bool default_to_smooth_interp)
627 {
628 struct exec_list *input_list = &consumer->inputs;
629 struct exec_list *output_list = &producer->outputs;
630 struct varying_loc remap[MAX_VARYINGS_INCL_PATCH][4] = {{{0}, {0}}};
631 struct varying_component *varying_comp_info;
632 unsigned varying_comp_info_size;
633
634 /* Gather varying component info */
635 gather_varying_component_info(consumer, &varying_comp_info,
636 &varying_comp_info_size,
637 default_to_smooth_interp);
638
639 /* Sort varying components. */
640 qsort(varying_comp_info, varying_comp_info_size,
641 sizeof(struct varying_component), cmp_varying_component);
642
643 unsigned cursor = 0;
644 unsigned comp = 0;
645
646 /* Set the remap array based on the sorted components */
647 for (unsigned i = 0; i < varying_comp_info_size; i++ ) {
648 struct varying_component *info = &varying_comp_info[i];
649
650 assert(info->is_patch || cursor < MAX_VARYING);
651 if (info->is_patch) {
652 /* The list should be sorted with all non-patch inputs first followed
653 * by patch inputs. When we hit our first patch input, we need to
654 * reset the cursor to MAX_VARYING so we put them in the right slot.
655 */
656 if (cursor < MAX_VARYING) {
657 cursor = MAX_VARYING;
658 comp = 0;
659 }
660
661 assign_remap_locations(remap, assigned_comps, info,
662 &cursor, &comp, MAX_VARYINGS_INCL_PATCH);
663 } else {
664 assign_remap_locations(remap, assigned_comps, info,
665 &cursor, &comp, MAX_VARYING);
666
667 /* Check if we failed to assign a remap location. This can happen if
668 * for example there are a bunch of unmovable components with
669 * mismatching interpolation types causing us to skip over locations
670 * that would have been useful for packing later components.
671 * The solution is to iterate over the locations again (this should
672 * happen very rarely in practice).
673 */
674 if (cursor == MAX_VARYING) {
675 cursor = 0;
676 comp = 0;
677 assign_remap_locations(remap, assigned_comps, info,
678 &cursor, &comp, MAX_VARYING);
679 }
680 }
681 }
682
683 ralloc_free(varying_comp_info);
684
685 uint64_t zero = 0;
686 uint32_t zero32 = 0;
687 remap_slots_and_components(input_list, consumer->info.stage, remap,
688 &consumer->info.inputs_read, &zero,
689 &consumer->info.patch_inputs_read, &zero32);
690 remap_slots_and_components(output_list, producer->info.stage, remap,
691 &producer->info.outputs_written,
692 &producer->info.outputs_read,
693 &producer->info.patch_outputs_written,
694 &producer->info.patch_outputs_read);
695 }
696
697 /* We assume that this has been called more-or-less directly after
698 * remove_unused_varyings. At this point, all of the varyings that we
699 * aren't going to be using have been completely removed and the
700 * inputs_read and outputs_written fields in nir_shader_info reflect
701 * this. Therefore, the total set of valid slots is the OR of the two
702 * sets of varyings; this accounts for varyings which one side may need
703 * to read/write even if the other doesn't. This can happen if, for
704 * instance, an array is used indirectly from one side causing it to be
705 * unsplittable but directly from the other.
706 */
707 void
708 nir_compact_varyings(nir_shader *producer, nir_shader *consumer,
709 bool default_to_smooth_interp)
710 {
711 assert(producer->info.stage != MESA_SHADER_FRAGMENT);
712 assert(consumer->info.stage != MESA_SHADER_VERTEX);
713
714 struct assigned_comps assigned_comps[MAX_VARYINGS_INCL_PATCH] = {0};
715
716 get_unmoveable_components_masks(&producer->outputs, assigned_comps,
717 producer->info.stage,
718 default_to_smooth_interp);
719 get_unmoveable_components_masks(&consumer->inputs, assigned_comps,
720 consumer->info.stage,
721 default_to_smooth_interp);
722
723 compact_components(producer, consumer, assigned_comps,
724 default_to_smooth_interp);
725 }
726
727 /*
728 * Mark XFB varyings as always_active_io in the consumer so the linking opts
729 * don't touch them.
730 */
731 void
732 nir_link_xfb_varyings(nir_shader *producer, nir_shader *consumer)
733 {
734 nir_variable *input_vars[MAX_VARYING] = { 0 };
735
736 nir_foreach_variable(var, &consumer->inputs) {
737 if (var->data.location >= VARYING_SLOT_VAR0 &&
738 var->data.location - VARYING_SLOT_VAR0 < MAX_VARYING) {
739
740 unsigned location = var->data.location - VARYING_SLOT_VAR0;
741 input_vars[location] = var;
742 }
743 }
744
745 nir_foreach_variable(var, &producer->outputs) {
746 if (var->data.location >= VARYING_SLOT_VAR0 &&
747 var->data.location - VARYING_SLOT_VAR0 < MAX_VARYING) {
748
749 if (!var->data.always_active_io)
750 continue;
751
752 unsigned location = var->data.location - VARYING_SLOT_VAR0;
753 if (input_vars[location]) {
754 input_vars[location]->data.always_active_io = true;
755 }
756 }
757 }
758 }
759
760 static bool
761 does_varying_match(nir_variable *out_var, nir_variable *in_var)
762 {
763 return in_var->data.location == out_var->data.location &&
764 in_var->data.location_frac == out_var->data.location_frac;
765 }
766
767 static nir_variable *
768 get_matching_input_var(nir_shader *consumer, nir_variable *out_var)
769 {
770 nir_foreach_variable(var, &consumer->inputs) {
771 if (does_varying_match(out_var, var))
772 return var;
773 }
774
775 return NULL;
776 }
777
778 static bool
779 can_replace_varying(nir_variable *out_var)
780 {
781 /* Skip types that require more complex handling.
782 * TODO: add support for these types.
783 */
784 if (glsl_type_is_array(out_var->type) ||
785 glsl_type_is_dual_slot(out_var->type) ||
786 glsl_type_is_matrix(out_var->type) ||
787 glsl_type_is_struct_or_ifc(out_var->type))
788 return false;
789
790 /* Limit this pass to scalars for now to keep things simple. Most varyings
791 * should have been lowered to scalars at this point anyway.
792 */
793 if (!glsl_type_is_scalar(out_var->type))
794 return false;
795
796 if (out_var->data.location < VARYING_SLOT_VAR0 ||
797 out_var->data.location - VARYING_SLOT_VAR0 >= MAX_VARYING)
798 return false;
799
800 return true;
801 }
802
803 static bool
804 replace_constant_input(nir_shader *shader, nir_intrinsic_instr *store_intr)
805 {
806 nir_function_impl *impl = nir_shader_get_entrypoint(shader);
807
808 nir_builder b;
809 nir_builder_init(&b, impl);
810
811 nir_variable *out_var =
812 nir_deref_instr_get_variable(nir_src_as_deref(store_intr->src[0]));
813
814 bool progress = false;
815 nir_foreach_block(block, impl) {
816 nir_foreach_instr(instr, block) {
817 if (instr->type != nir_instr_type_intrinsic)
818 continue;
819
820 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
821 if (intr->intrinsic != nir_intrinsic_load_deref)
822 continue;
823
824 nir_deref_instr *in_deref = nir_src_as_deref(intr->src[0]);
825 if (in_deref->mode != nir_var_shader_in)
826 continue;
827
828 nir_variable *in_var = nir_deref_instr_get_variable(in_deref);
829
830 if (!does_varying_match(out_var, in_var))
831 continue;
832
833 b.cursor = nir_before_instr(instr);
834
835 nir_load_const_instr *out_const =
836 nir_instr_as_load_const(store_intr->src[1].ssa->parent_instr);
837
838 /* Add new const to replace the input */
839 nir_ssa_def *nconst = nir_build_imm(&b, store_intr->num_components,
840 intr->dest.ssa.bit_size,
841 out_const->value);
842
843 nir_ssa_def_rewrite_uses(&intr->dest.ssa, nir_src_for_ssa(nconst));
844
845 progress = true;
846 }
847 }
848
849 return progress;
850 }
851
852 static bool
853 replace_duplicate_input(nir_shader *shader, nir_variable *input_var,
854 nir_intrinsic_instr *dup_store_intr)
855 {
856 assert(input_var);
857
858 nir_function_impl *impl = nir_shader_get_entrypoint(shader);
859
860 nir_builder b;
861 nir_builder_init(&b, impl);
862
863 nir_variable *dup_out_var =
864 nir_deref_instr_get_variable(nir_src_as_deref(dup_store_intr->src[0]));
865
866 bool progress = false;
867 nir_foreach_block(block, impl) {
868 nir_foreach_instr(instr, block) {
869 if (instr->type != nir_instr_type_intrinsic)
870 continue;
871
872 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
873 if (intr->intrinsic != nir_intrinsic_load_deref)
874 continue;
875
876 nir_deref_instr *in_deref = nir_src_as_deref(intr->src[0]);
877 if (in_deref->mode != nir_var_shader_in)
878 continue;
879
880 nir_variable *in_var = nir_deref_instr_get_variable(in_deref);
881
882 if (!does_varying_match(dup_out_var, in_var) ||
883 in_var->data.interpolation != input_var->data.interpolation ||
884 get_interp_loc(in_var) != get_interp_loc(input_var))
885 continue;
886
887 b.cursor = nir_before_instr(instr);
888
889 nir_ssa_def *load = nir_load_var(&b, input_var);
890 nir_ssa_def_rewrite_uses(&intr->dest.ssa, nir_src_for_ssa(load));
891
892 progress = true;
893 }
894 }
895
896 return progress;
897 }
898
899 bool
900 nir_link_opt_varyings(nir_shader *producer, nir_shader *consumer)
901 {
902 /* TODO: Add support for more shader stage combinations */
903 if (consumer->info.stage != MESA_SHADER_FRAGMENT ||
904 (producer->info.stage != MESA_SHADER_VERTEX &&
905 producer->info.stage != MESA_SHADER_TESS_EVAL))
906 return false;
907
908 bool progress = false;
909
910 nir_function_impl *impl = nir_shader_get_entrypoint(producer);
911
912 struct hash_table *varying_values = _mesa_pointer_hash_table_create(NULL);
913
914 /* If we find a store in the last block of the producer we can be sure this
915 * is the only possible value for this output.
916 */
917 nir_block *last_block = nir_impl_last_block(impl);
918 nir_foreach_instr_reverse(instr, last_block) {
919 if (instr->type != nir_instr_type_intrinsic)
920 continue;
921
922 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
923
924 if (intr->intrinsic != nir_intrinsic_store_deref)
925 continue;
926
927 nir_deref_instr *out_deref = nir_src_as_deref(intr->src[0]);
928 if (out_deref->mode != nir_var_shader_out)
929 continue;
930
931 nir_variable *out_var = nir_deref_instr_get_variable(out_deref);
932 if (!can_replace_varying(out_var))
933 continue;
934
935 if (intr->src[1].ssa->parent_instr->type == nir_instr_type_load_const) {
936 progress |= replace_constant_input(consumer, intr);
937 } else {
938 struct hash_entry *entry =
939 _mesa_hash_table_search(varying_values, intr->src[1].ssa);
940 if (entry) {
941 progress |= replace_duplicate_input(consumer,
942 (nir_variable *) entry->data,
943 intr);
944 } else {
945 nir_variable *in_var = get_matching_input_var(consumer, out_var);
946 if (in_var) {
947 _mesa_hash_table_insert(varying_values, intr->src[1].ssa,
948 in_var);
949 }
950 }
951 }
952 }
953
954 _mesa_hash_table_destroy(varying_values, NULL);
955
956 return progress;
957 }