nir: rewrite varying component packing
[mesa.git] / src / compiler / nir / nir_linking_helpers.c
1 /*
2 * Copyright © 2015 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include "nir.h"
25 #include "nir_builder.h"
26 #include "util/set.h"
27 #include "util/hash_table.h"
28
29 /* This file contains various little helpers for doing simple linking in
30 * NIR. Eventually, we'll probably want a full-blown varying packing
31 * implementation in here. Right now, it just deletes unused things.
32 */
33
34 /**
35 * Returns the bits in the inputs_read, outputs_written, or
36 * system_values_read bitfield corresponding to this variable.
37 */
38 static uint64_t
39 get_variable_io_mask(nir_variable *var, gl_shader_stage stage)
40 {
41 if (var->data.location < 0)
42 return 0;
43
44 unsigned location = var->data.patch ?
45 var->data.location - VARYING_SLOT_PATCH0 : var->data.location;
46
47 assert(var->data.mode == nir_var_shader_in ||
48 var->data.mode == nir_var_shader_out ||
49 var->data.mode == nir_var_system_value);
50 assert(var->data.location >= 0);
51
52 const struct glsl_type *type = var->type;
53 if (nir_is_per_vertex_io(var, stage)) {
54 assert(glsl_type_is_array(type));
55 type = glsl_get_array_element(type);
56 }
57
58 unsigned slots = glsl_count_attribute_slots(type, false);
59 return ((1ull << slots) - 1) << location;
60 }
61
62 static void
63 tcs_add_output_reads(nir_shader *shader, uint64_t *read, uint64_t *patches_read)
64 {
65 nir_foreach_function(function, shader) {
66 if (!function->impl)
67 continue;
68
69 nir_foreach_block(block, function->impl) {
70 nir_foreach_instr(instr, block) {
71 if (instr->type != nir_instr_type_intrinsic)
72 continue;
73
74 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
75 if (intrin->intrinsic != nir_intrinsic_load_deref)
76 continue;
77
78 nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
79 if (deref->mode != nir_var_shader_out)
80 continue;
81
82 nir_variable *var = nir_deref_instr_get_variable(deref);
83 if (var->data.patch) {
84 patches_read[var->data.location_frac] |=
85 get_variable_io_mask(var, shader->info.stage);
86 } else {
87 read[var->data.location_frac] |=
88 get_variable_io_mask(var, shader->info.stage);
89 }
90 }
91 }
92 }
93 }
94
95 /**
96 * Helper for removing unused shader I/O variables, by demoting them to global
97 * variables (which may then by dead code eliminated).
98 *
99 * Example usage is:
100 *
101 * progress = nir_remove_unused_io_vars(producer,
102 * &producer->outputs,
103 * read, patches_read) ||
104 * progress;
105 *
106 * The "used" should be an array of 4 uint64_ts (probably of VARYING_BIT_*)
107 * representing each .location_frac used. Note that for vector variables,
108 * only the first channel (.location_frac) is examined for deciding if the
109 * variable is used!
110 */
111 bool
112 nir_remove_unused_io_vars(nir_shader *shader, struct exec_list *var_list,
113 uint64_t *used_by_other_stage,
114 uint64_t *used_by_other_stage_patches)
115 {
116 bool progress = false;
117 uint64_t *used;
118
119 nir_foreach_variable_safe(var, var_list) {
120 if (var->data.patch)
121 used = used_by_other_stage_patches;
122 else
123 used = used_by_other_stage;
124
125 if (var->data.location < VARYING_SLOT_VAR0 && var->data.location >= 0)
126 continue;
127
128 if (var->data.always_active_io)
129 continue;
130
131 if (var->data.explicit_xfb_buffer)
132 continue;
133
134 uint64_t other_stage = used[var->data.location_frac];
135
136 if (!(other_stage & get_variable_io_mask(var, shader->info.stage))) {
137 /* This one is invalid, make it a global variable instead */
138 var->data.location = 0;
139 var->data.mode = nir_var_shader_temp;
140
141 exec_node_remove(&var->node);
142 exec_list_push_tail(&shader->globals, &var->node);
143
144 progress = true;
145 }
146 }
147
148 if (progress)
149 nir_fixup_deref_modes(shader);
150
151 return progress;
152 }
153
154 bool
155 nir_remove_unused_varyings(nir_shader *producer, nir_shader *consumer)
156 {
157 assert(producer->info.stage != MESA_SHADER_FRAGMENT);
158 assert(consumer->info.stage != MESA_SHADER_VERTEX);
159
160 uint64_t read[4] = { 0 }, written[4] = { 0 };
161 uint64_t patches_read[4] = { 0 }, patches_written[4] = { 0 };
162
163 nir_foreach_variable(var, &producer->outputs) {
164 if (var->data.patch) {
165 patches_written[var->data.location_frac] |=
166 get_variable_io_mask(var, producer->info.stage);
167 } else {
168 written[var->data.location_frac] |=
169 get_variable_io_mask(var, producer->info.stage);
170 }
171 }
172
173 nir_foreach_variable(var, &consumer->inputs) {
174 if (var->data.patch) {
175 patches_read[var->data.location_frac] |=
176 get_variable_io_mask(var, consumer->info.stage);
177 } else {
178 read[var->data.location_frac] |=
179 get_variable_io_mask(var, consumer->info.stage);
180 }
181 }
182
183 /* Each TCS invocation can read data written by other TCS invocations,
184 * so even if the outputs are not used by the TES we must also make
185 * sure they are not read by the TCS before demoting them to globals.
186 */
187 if (producer->info.stage == MESA_SHADER_TESS_CTRL)
188 tcs_add_output_reads(producer, read, patches_read);
189
190 bool progress = false;
191 progress = nir_remove_unused_io_vars(producer, &producer->outputs, read,
192 patches_read);
193
194 progress = nir_remove_unused_io_vars(consumer, &consumer->inputs, written,
195 patches_written) || progress;
196
197 return progress;
198 }
199
200 static uint8_t
201 get_interp_type(nir_variable *var, const struct glsl_type *type,
202 bool default_to_smooth_interp)
203 {
204 if (glsl_type_is_integer(type))
205 return INTERP_MODE_FLAT;
206 else if (var->data.interpolation != INTERP_MODE_NONE)
207 return var->data.interpolation;
208 else if (default_to_smooth_interp)
209 return INTERP_MODE_SMOOTH;
210 else
211 return INTERP_MODE_NONE;
212 }
213
214 #define INTERPOLATE_LOC_SAMPLE 0
215 #define INTERPOLATE_LOC_CENTROID 1
216 #define INTERPOLATE_LOC_CENTER 2
217
218 static uint8_t
219 get_interp_loc(nir_variable *var)
220 {
221 if (var->data.sample)
222 return INTERPOLATE_LOC_SAMPLE;
223 else if (var->data.centroid)
224 return INTERPOLATE_LOC_CENTROID;
225 else
226 return INTERPOLATE_LOC_CENTER;
227 }
228
229 static bool
230 is_packing_supported_for_type(const struct glsl_type *type)
231 {
232 /* We ignore complex types such as arrays, matrices, structs and bitsizes
233 * other then 32bit. All other vector types should have been split into
234 * scalar variables by the lower_io_to_scalar pass. The only exception
235 * should be OpenGL xfb varyings.
236 * TODO: add support for more complex types?
237 */
238 return glsl_type_is_scalar(type) && glsl_type_is_32bit(type);
239 }
240
241 struct assigned_comps
242 {
243 uint8_t comps;
244 uint8_t interp_type;
245 uint8_t interp_loc;
246 };
247
248 /* Packing arrays and dual slot varyings is difficult so to avoid complex
249 * algorithms this function just assigns them their existing location for now.
250 * TODO: allow better packing of complex types.
251 */
252 static void
253 get_unmoveable_components_masks(struct exec_list *var_list,
254 struct assigned_comps *comps,
255 gl_shader_stage stage,
256 bool default_to_smooth_interp)
257 {
258 nir_foreach_variable_safe(var, var_list) {
259 assert(var->data.location >= 0);
260
261 /* Only remap things that aren't built-ins. */
262 if (var->data.location >= VARYING_SLOT_VAR0 &&
263 var->data.location - VARYING_SLOT_VAR0 < MAX_VARYINGS_INCL_PATCH) {
264
265 const struct glsl_type *type = var->type;
266 if (nir_is_per_vertex_io(var, stage)) {
267 assert(glsl_type_is_array(type));
268 type = glsl_get_array_element(type);
269 }
270
271 /* If we can pack this varying then don't mark the components as
272 * used.
273 */
274 if (is_packing_supported_for_type(type))
275 continue;
276
277 unsigned location = var->data.location - VARYING_SLOT_VAR0;
278 unsigned elements =
279 glsl_get_vector_elements(glsl_without_array(type));
280
281 bool dual_slot = glsl_type_is_dual_slot(glsl_without_array(type));
282 unsigned slots = glsl_count_attribute_slots(type, false);
283 unsigned dmul = glsl_type_is_64bit(glsl_without_array(type)) ? 2 : 1;
284 unsigned comps_slot2 = 0;
285 for (unsigned i = 0; i < slots; i++) {
286 if (dual_slot) {
287 if (i & 1) {
288 comps[location + i].comps |= ((1 << comps_slot2) - 1);
289 } else {
290 unsigned num_comps = 4 - var->data.location_frac;
291 comps_slot2 = (elements * dmul) - num_comps;
292
293 /* Assume ARB_enhanced_layouts packing rules for doubles */
294 assert(var->data.location_frac == 0 ||
295 var->data.location_frac == 2);
296 assert(comps_slot2 <= 4);
297
298 comps[location + i].comps |=
299 ((1 << num_comps) - 1) << var->data.location_frac;
300 }
301 } else {
302 comps[location + i].comps |=
303 ((1 << (elements * dmul)) - 1) << var->data.location_frac;
304 }
305
306 comps[location + i].interp_type =
307 get_interp_type(var, type, default_to_smooth_interp);
308 comps[location + i].interp_loc = get_interp_loc(var);
309 }
310 }
311 }
312 }
313
314 struct varying_loc
315 {
316 uint8_t component;
317 uint32_t location;
318 };
319
320 static void
321 mark_all_used_slots(nir_variable *var, uint64_t *slots_used,
322 uint64_t slots_used_mask, unsigned num_slots)
323 {
324 unsigned loc_offset = var->data.patch ? VARYING_SLOT_PATCH0 : 0;
325
326 slots_used[var->data.patch ? 1 : 0] |= slots_used_mask &
327 BITFIELD64_RANGE(var->data.location - loc_offset, num_slots);
328 }
329
330 static void
331 mark_used_slot(nir_variable *var, uint64_t *slots_used, unsigned offset)
332 {
333 unsigned loc_offset = var->data.patch ? VARYING_SLOT_PATCH0 : 0;
334
335 slots_used[var->data.patch ? 1 : 0] |=
336 BITFIELD64_BIT(var->data.location - loc_offset + offset);
337 }
338
339 static void
340 remap_slots_and_components(struct exec_list *var_list, gl_shader_stage stage,
341 struct varying_loc (*remap)[4],
342 uint64_t *slots_used, uint64_t *out_slots_read,
343 uint32_t *p_slots_used, uint32_t *p_out_slots_read)
344 {
345 uint64_t out_slots_read_tmp[2] = {0};
346 uint64_t slots_used_tmp[2] = {0};
347
348 /* We don't touch builtins so just copy the bitmask */
349 slots_used_tmp[0] = *slots_used & BITFIELD64_RANGE(0, VARYING_SLOT_VAR0);
350
351 nir_foreach_variable(var, var_list) {
352 assert(var->data.location >= 0);
353
354 /* Only remap things that aren't built-ins */
355 if (var->data.location >= VARYING_SLOT_VAR0 &&
356 var->data.location - VARYING_SLOT_VAR0 < MAX_VARYINGS_INCL_PATCH) {
357
358 const struct glsl_type *type = var->type;
359 if (nir_is_per_vertex_io(var, stage)) {
360 assert(glsl_type_is_array(type));
361 type = glsl_get_array_element(type);
362 }
363
364 unsigned num_slots = glsl_count_attribute_slots(type, false);
365 bool used_across_stages = false;
366 bool outputs_read = false;
367
368 unsigned location = var->data.location - VARYING_SLOT_VAR0;
369 struct varying_loc *new_loc = &remap[location][var->data.location_frac];
370
371 unsigned loc_offset = var->data.patch ? VARYING_SLOT_PATCH0 : 0;
372 uint64_t used = var->data.patch ? *p_slots_used : *slots_used;
373 uint64_t outs_used =
374 var->data.patch ? *p_out_slots_read : *out_slots_read;
375 uint64_t slots =
376 BITFIELD64_RANGE(var->data.location - loc_offset, num_slots);
377
378 if (slots & used)
379 used_across_stages = true;
380
381 if (slots & outs_used)
382 outputs_read = true;
383
384 if (new_loc->location) {
385 var->data.location = new_loc->location;
386 var->data.location_frac = new_loc->component;
387 }
388
389 if (var->data.always_active_io) {
390 /* We can't apply link time optimisations (specifically array
391 * splitting) to these so we need to copy the existing mask
392 * otherwise we will mess up the mask for things like partially
393 * marked arrays.
394 */
395 if (used_across_stages)
396 mark_all_used_slots(var, slots_used_tmp, used, num_slots);
397
398 if (outputs_read) {
399 mark_all_used_slots(var, out_slots_read_tmp, outs_used,
400 num_slots);
401 }
402 } else {
403 for (unsigned i = 0; i < num_slots; i++) {
404 if (used_across_stages)
405 mark_used_slot(var, slots_used_tmp, i);
406
407 if (outputs_read)
408 mark_used_slot(var, out_slots_read_tmp, i);
409 }
410 }
411 }
412 }
413
414 *slots_used = slots_used_tmp[0];
415 *out_slots_read = out_slots_read_tmp[0];
416 *p_slots_used = slots_used_tmp[1];
417 *p_out_slots_read = out_slots_read_tmp[1];
418 }
419
420 struct varying_component {
421 nir_variable *var;
422 uint8_t interp_type;
423 uint8_t interp_loc;
424 bool is_patch;
425 bool initialised;
426 };
427
428 static int
429 cmp_varying_component(const void *comp1_v, const void *comp2_v)
430 {
431 struct varying_component *comp1 = (struct varying_component *) comp1_v;
432 struct varying_component *comp2 = (struct varying_component *) comp2_v;
433
434 /* We want patches to be order at the end of the array */
435 if (comp1->is_patch != comp2->is_patch)
436 return comp1->is_patch ? 1 : -1;
437
438 /* We can only pack varyings with matching interpolation types so group
439 * them together.
440 */
441 if (comp1->interp_type != comp2->interp_type)
442 return comp1->interp_type - comp2->interp_type;
443
444 /* Interpolation loc must match also. */
445 if (comp1->interp_loc != comp2->interp_loc)
446 return comp1->interp_loc - comp2->interp_loc;
447
448 /* If everything else matches just use the original location to sort */
449 return comp1->var->data.location - comp2->var->data.location;
450 }
451
452 static void
453 gather_varying_component_info(nir_shader *consumer,
454 struct varying_component **varying_comp_info,
455 unsigned *varying_comp_info_size,
456 bool default_to_smooth_interp)
457 {
458 unsigned store_varying_info_idx[MAX_VARYINGS_INCL_PATCH][4] = {0};
459 unsigned num_of_comps_to_pack = 0;
460
461 /* Count the number of varying that can be packed and create a mapping
462 * of those varyings to the array we will pass to qsort.
463 */
464 nir_foreach_variable(var, &consumer->inputs) {
465
466 /* Only remap things that aren't builtins. */
467 if (var->data.location >= VARYING_SLOT_VAR0 &&
468 var->data.location - VARYING_SLOT_VAR0 < MAX_VARYINGS_INCL_PATCH) {
469
470 /* We can't repack xfb varyings. */
471 if (var->data.always_active_io)
472 continue;
473
474 const struct glsl_type *type = var->type;
475 if (nir_is_per_vertex_io(var, consumer->info.stage)) {
476 assert(glsl_type_is_array(type));
477 type = glsl_get_array_element(type);
478 }
479
480 if (!is_packing_supported_for_type(type))
481 continue;
482
483 unsigned loc = var->data.location - VARYING_SLOT_VAR0;
484 store_varying_info_idx[loc][var->data.location_frac] =
485 ++num_of_comps_to_pack;
486 }
487 }
488
489 *varying_comp_info_size = num_of_comps_to_pack;
490 *varying_comp_info = rzalloc_array(NULL, struct varying_component,
491 num_of_comps_to_pack);
492
493 nir_function_impl *impl = nir_shader_get_entrypoint(consumer);
494
495 /* Walk over the shader and populate the varying component info array */
496 nir_foreach_block(block, impl) {
497 nir_foreach_instr(instr, block) {
498 if (instr->type != nir_instr_type_intrinsic)
499 continue;
500
501 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
502 if (intr->intrinsic != nir_intrinsic_load_deref &&
503 intr->intrinsic != nir_intrinsic_interp_deref_at_centroid &&
504 intr->intrinsic != nir_intrinsic_interp_deref_at_sample &&
505 intr->intrinsic != nir_intrinsic_interp_deref_at_offset)
506 continue;
507
508 nir_deref_instr *deref = nir_src_as_deref(intr->src[0]);
509 if (deref->mode != nir_var_shader_in)
510 continue;
511
512 /* We only remap things that aren't builtins. */
513 nir_variable *in_var = nir_deref_instr_get_variable(deref);
514 if (in_var->data.location < VARYING_SLOT_VAR0)
515 continue;
516
517 unsigned location = in_var->data.location - VARYING_SLOT_VAR0;
518 if (location >= MAX_VARYINGS_INCL_PATCH)
519 continue;
520
521 unsigned var_info_idx =
522 store_varying_info_idx[location][in_var->data.location_frac];
523 if (!var_info_idx)
524 continue;
525
526 struct varying_component *vc_info =
527 &(*varying_comp_info)[var_info_idx-1];
528
529 if (!vc_info->initialised) {
530 const struct glsl_type *type = in_var->type;
531 if (nir_is_per_vertex_io(in_var, consumer->info.stage)) {
532 assert(glsl_type_is_array(type));
533 type = glsl_get_array_element(type);
534 }
535
536 vc_info->var = in_var;
537 vc_info->interp_type =
538 get_interp_type(in_var, type, default_to_smooth_interp);
539 vc_info->interp_loc = get_interp_loc(in_var);
540 vc_info->is_patch = in_var->data.patch;
541 }
542 }
543 }
544 }
545
546 static void
547 assign_remap_locations(struct varying_loc (*remap)[4],
548 struct assigned_comps *assigned_comps,
549 struct varying_component *info,
550 unsigned *cursor, unsigned *comp,
551 unsigned max_location)
552 {
553 unsigned tmp_cursor = *cursor;
554 unsigned tmp_comp = *comp;
555
556 for (; tmp_cursor < max_location; tmp_cursor++) {
557
558 if (assigned_comps[tmp_cursor].comps) {
559 /* We can only pack varyings with matching interpolation types,
560 * interpolation loc must match also.
561 * TODO: i965 can handle interpolation locations that don't match,
562 * but the radeonsi nir backend handles everything as vec4s and so
563 * expects this to be the same for all components. We could make this
564 * check driver specfific or drop it if NIR ever become the only
565 * radeonsi backend.
566 */
567 if (assigned_comps[tmp_cursor].interp_type != info->interp_type ||
568 assigned_comps[tmp_cursor].interp_loc != info->interp_loc) {
569 tmp_comp = 0;
570 continue;
571 }
572
573 while (tmp_comp < 4 &&
574 (assigned_comps[tmp_cursor].comps & (1 << tmp_comp))) {
575 tmp_comp++;
576 }
577 }
578
579 if (tmp_comp == 4) {
580 tmp_comp = 0;
581 continue;
582 }
583
584 unsigned location = info->var->data.location - VARYING_SLOT_VAR0;
585
586 /* Once we have assigned a location mark it as used */
587 assigned_comps[tmp_cursor].comps |= (1 << tmp_comp);
588 assigned_comps[tmp_cursor].interp_type = info->interp_type;
589 assigned_comps[tmp_cursor].interp_loc = info->interp_loc;
590
591 /* Assign remap location */
592 remap[location][info->var->data.location_frac].component = tmp_comp++;
593 remap[location][info->var->data.location_frac].location =
594 tmp_cursor + VARYING_SLOT_VAR0;
595
596 break;
597 }
598
599 *cursor = tmp_cursor;
600 *comp = tmp_comp;
601 }
602
603 /* If there are empty components in the slot compact the remaining components
604 * as close to component 0 as possible. This will make it easier to fill the
605 * empty components with components from a different slot in a following pass.
606 */
607 static void
608 compact_components(nir_shader *producer, nir_shader *consumer,
609 struct assigned_comps *assigned_comps,
610 bool default_to_smooth_interp)
611 {
612 struct exec_list *input_list = &consumer->inputs;
613 struct exec_list *output_list = &producer->outputs;
614 struct varying_loc remap[MAX_VARYINGS_INCL_PATCH][4] = {{{0}, {0}}};
615 struct varying_component *varying_comp_info;
616 unsigned varying_comp_info_size;
617
618 /* Gather varying component info */
619 gather_varying_component_info(consumer, &varying_comp_info,
620 &varying_comp_info_size,
621 default_to_smooth_interp);
622
623 /* Sort varying components. */
624 qsort(varying_comp_info, varying_comp_info_size,
625 sizeof(struct varying_component), cmp_varying_component);
626
627 unsigned cursor = 0;
628 unsigned comp = 0;
629
630 /* Set the remap array based on the sorted components */
631 for (unsigned i = 0; i < varying_comp_info_size; i++ ) {
632 struct varying_component *info = &varying_comp_info[i];
633
634 assert(info->is_patch || cursor < MAX_VARYING);
635 if (info->is_patch) {
636 /* The list should be sorted with all non-patch inputs first followed
637 * by patch inputs. When we hit our first patch input, we need to
638 * reset the cursor to MAX_VARYING so we put them in the right slot.
639 */
640 if (cursor < MAX_VARYING) {
641 cursor = MAX_VARYING;
642 comp = 0;
643 }
644
645 assign_remap_locations(remap, assigned_comps, info,
646 &cursor, &comp, MAX_VARYINGS_INCL_PATCH);
647 } else {
648 assign_remap_locations(remap, assigned_comps, info,
649 &cursor, &comp, MAX_VARYING);
650
651 /* Check if we failed to assign a remap location. This can happen if
652 * for example there are a bunch of unmovable components with
653 * mismatching interpolation types causing us to skip over locations
654 * that would have been useful for packing later components.
655 * The solution is to iterate over the locations again (this should
656 * happen very rarely in practice).
657 */
658 if (cursor == MAX_VARYING) {
659 cursor = 0;
660 comp = 0;
661 assign_remap_locations(remap, assigned_comps, info,
662 &cursor, &comp, MAX_VARYING);
663 }
664 }
665 }
666
667 ralloc_free(varying_comp_info);
668
669 uint64_t zero = 0;
670 uint32_t zero32 = 0;
671 remap_slots_and_components(input_list, consumer->info.stage, remap,
672 &consumer->info.inputs_read, &zero,
673 &consumer->info.patch_inputs_read, &zero32);
674 remap_slots_and_components(output_list, producer->info.stage, remap,
675 &producer->info.outputs_written,
676 &producer->info.outputs_read,
677 &producer->info.patch_outputs_written,
678 &producer->info.patch_outputs_read);
679 }
680
681 /* We assume that this has been called more-or-less directly after
682 * remove_unused_varyings. At this point, all of the varyings that we
683 * aren't going to be using have been completely removed and the
684 * inputs_read and outputs_written fields in nir_shader_info reflect
685 * this. Therefore, the total set of valid slots is the OR of the two
686 * sets of varyings; this accounts for varyings which one side may need
687 * to read/write even if the other doesn't. This can happen if, for
688 * instance, an array is used indirectly from one side causing it to be
689 * unsplittable but directly from the other.
690 */
691 void
692 nir_compact_varyings(nir_shader *producer, nir_shader *consumer,
693 bool default_to_smooth_interp)
694 {
695 assert(producer->info.stage != MESA_SHADER_FRAGMENT);
696 assert(consumer->info.stage != MESA_SHADER_VERTEX);
697
698 struct assigned_comps assigned_comps[MAX_VARYINGS_INCL_PATCH] = {0};
699
700 get_unmoveable_components_masks(&producer->outputs, assigned_comps,
701 producer->info.stage,
702 default_to_smooth_interp);
703 get_unmoveable_components_masks(&consumer->inputs, assigned_comps,
704 consumer->info.stage,
705 default_to_smooth_interp);
706
707 compact_components(producer, consumer, assigned_comps,
708 default_to_smooth_interp);
709 }
710
711 /*
712 * Mark XFB varyings as always_active_io in the consumer so the linking opts
713 * don't touch them.
714 */
715 void
716 nir_link_xfb_varyings(nir_shader *producer, nir_shader *consumer)
717 {
718 nir_variable *input_vars[MAX_VARYING] = { 0 };
719
720 nir_foreach_variable(var, &consumer->inputs) {
721 if (var->data.location >= VARYING_SLOT_VAR0 &&
722 var->data.location - VARYING_SLOT_VAR0 < MAX_VARYING) {
723
724 unsigned location = var->data.location - VARYING_SLOT_VAR0;
725 input_vars[location] = var;
726 }
727 }
728
729 nir_foreach_variable(var, &producer->outputs) {
730 if (var->data.location >= VARYING_SLOT_VAR0 &&
731 var->data.location - VARYING_SLOT_VAR0 < MAX_VARYING) {
732
733 if (!var->data.always_active_io)
734 continue;
735
736 unsigned location = var->data.location - VARYING_SLOT_VAR0;
737 if (input_vars[location]) {
738 input_vars[location]->data.always_active_io = true;
739 }
740 }
741 }
742 }
743
744 static bool
745 does_varying_match(nir_variable *out_var, nir_variable *in_var)
746 {
747 return in_var->data.location == out_var->data.location &&
748 in_var->data.location_frac == out_var->data.location_frac;
749 }
750
751 static nir_variable *
752 get_matching_input_var(nir_shader *consumer, nir_variable *out_var)
753 {
754 nir_foreach_variable(var, &consumer->inputs) {
755 if (does_varying_match(out_var, var))
756 return var;
757 }
758
759 return NULL;
760 }
761
762 static bool
763 can_replace_varying(nir_variable *out_var)
764 {
765 /* Skip types that require more complex handling.
766 * TODO: add support for these types.
767 */
768 if (glsl_type_is_array(out_var->type) ||
769 glsl_type_is_dual_slot(out_var->type) ||
770 glsl_type_is_matrix(out_var->type) ||
771 glsl_type_is_struct(out_var->type))
772 return false;
773
774 /* Limit this pass to scalars for now to keep things simple. Most varyings
775 * should have been lowered to scalars at this point anyway.
776 */
777 if (!glsl_type_is_scalar(out_var->type))
778 return false;
779
780 if (out_var->data.location < VARYING_SLOT_VAR0 ||
781 out_var->data.location - VARYING_SLOT_VAR0 >= MAX_VARYING)
782 return false;
783
784 return true;
785 }
786
787 static bool
788 replace_constant_input(nir_shader *shader, nir_intrinsic_instr *store_intr)
789 {
790 nir_function_impl *impl = nir_shader_get_entrypoint(shader);
791
792 nir_builder b;
793 nir_builder_init(&b, impl);
794
795 nir_variable *out_var =
796 nir_deref_instr_get_variable(nir_src_as_deref(store_intr->src[0]));
797
798 bool progress = false;
799 nir_foreach_block(block, impl) {
800 nir_foreach_instr(instr, block) {
801 if (instr->type != nir_instr_type_intrinsic)
802 continue;
803
804 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
805 if (intr->intrinsic != nir_intrinsic_load_deref)
806 continue;
807
808 nir_deref_instr *in_deref = nir_src_as_deref(intr->src[0]);
809 if (in_deref->mode != nir_var_shader_in)
810 continue;
811
812 nir_variable *in_var = nir_deref_instr_get_variable(in_deref);
813
814 if (!does_varying_match(out_var, in_var))
815 continue;
816
817 b.cursor = nir_before_instr(instr);
818
819 nir_load_const_instr *out_const =
820 nir_instr_as_load_const(store_intr->src[1].ssa->parent_instr);
821
822 /* Add new const to replace the input */
823 nir_ssa_def *nconst = nir_build_imm(&b, store_intr->num_components,
824 intr->dest.ssa.bit_size,
825 out_const->value);
826
827 nir_ssa_def_rewrite_uses(&intr->dest.ssa, nir_src_for_ssa(nconst));
828
829 progress = true;
830 }
831 }
832
833 return progress;
834 }
835
836 static bool
837 replace_duplicate_input(nir_shader *shader, nir_variable *input_var,
838 nir_intrinsic_instr *dup_store_intr)
839 {
840 assert(input_var);
841
842 nir_function_impl *impl = nir_shader_get_entrypoint(shader);
843
844 nir_builder b;
845 nir_builder_init(&b, impl);
846
847 nir_variable *dup_out_var =
848 nir_deref_instr_get_variable(nir_src_as_deref(dup_store_intr->src[0]));
849
850 bool progress = false;
851 nir_foreach_block(block, impl) {
852 nir_foreach_instr(instr, block) {
853 if (instr->type != nir_instr_type_intrinsic)
854 continue;
855
856 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
857 if (intr->intrinsic != nir_intrinsic_load_deref)
858 continue;
859
860 nir_deref_instr *in_deref = nir_src_as_deref(intr->src[0]);
861 if (in_deref->mode != nir_var_shader_in)
862 continue;
863
864 nir_variable *in_var = nir_deref_instr_get_variable(in_deref);
865
866 if (!does_varying_match(dup_out_var, in_var) ||
867 in_var->data.interpolation != input_var->data.interpolation ||
868 get_interp_loc(in_var) != get_interp_loc(input_var))
869 continue;
870
871 b.cursor = nir_before_instr(instr);
872
873 nir_ssa_def *load = nir_load_var(&b, input_var);
874 nir_ssa_def_rewrite_uses(&intr->dest.ssa, nir_src_for_ssa(load));
875
876 progress = true;
877 }
878 }
879
880 return progress;
881 }
882
883 bool
884 nir_link_opt_varyings(nir_shader *producer, nir_shader *consumer)
885 {
886 /* TODO: Add support for more shader stage combinations */
887 if (consumer->info.stage != MESA_SHADER_FRAGMENT ||
888 (producer->info.stage != MESA_SHADER_VERTEX &&
889 producer->info.stage != MESA_SHADER_TESS_EVAL))
890 return false;
891
892 bool progress = false;
893
894 nir_function_impl *impl = nir_shader_get_entrypoint(producer);
895
896 struct hash_table *varying_values = _mesa_pointer_hash_table_create(NULL);
897
898 /* If we find a store in the last block of the producer we can be sure this
899 * is the only possible value for this output.
900 */
901 nir_block *last_block = nir_impl_last_block(impl);
902 nir_foreach_instr_reverse(instr, last_block) {
903 if (instr->type != nir_instr_type_intrinsic)
904 continue;
905
906 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
907
908 if (intr->intrinsic != nir_intrinsic_store_deref)
909 continue;
910
911 nir_deref_instr *out_deref = nir_src_as_deref(intr->src[0]);
912 if (out_deref->mode != nir_var_shader_out)
913 continue;
914
915 nir_variable *out_var = nir_deref_instr_get_variable(out_deref);
916 if (!can_replace_varying(out_var))
917 continue;
918
919 if (intr->src[1].ssa->parent_instr->type == nir_instr_type_load_const) {
920 progress |= replace_constant_input(consumer, intr);
921 } else {
922 struct hash_entry *entry =
923 _mesa_hash_table_search(varying_values, intr->src[1].ssa);
924 if (entry) {
925 progress |= replace_duplicate_input(consumer,
926 (nir_variable *) entry->data,
927 intr);
928 } else {
929 nir_variable *in_var = get_matching_input_var(consumer, out_var);
930 if (in_var) {
931 _mesa_hash_table_insert(varying_values, intr->src[1].ssa,
932 in_var);
933 }
934 }
935 }
936 }
937
938 _mesa_hash_table_destroy(varying_values, NULL);
939
940 return progress;
941 }