2 * Copyright © 2019 Intel Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
25 #include "nir_builder.h"
26 #include "nir_deref.h"
28 /** @file nir_lower_io_to_vector.c
30 * Merges compatible input/output variables residing in different components
31 * of the same location. It's expected that further passes such as
32 * nir_lower_io_to_temporaries will combine loads and stores of the merged
33 * variables, producing vector nir_load_input/nir_store_output instructions
34 * when all is said and done.
37 /* FRAG_RESULT_MAX+1 instead of just FRAG_RESULT_MAX because of how this pass
38 * handles dual source blending */
39 #define MAX_SLOTS MAX2(VARYING_SLOT_TESS_MAX, FRAG_RESULT_MAX+1)
42 get_slot(const nir_variable
*var
)
44 /* This handling of dual-source blending might not be correct when more than
45 * one render target is supported, but it seems no driver supports more than
47 return var
->data
.location
+ var
->data
.index
;
50 static const struct glsl_type
*
51 get_per_vertex_type(const nir_shader
*shader
, const nir_variable
*var
,
52 unsigned *num_vertices
)
54 if (nir_is_per_vertex_io(var
, shader
->info
.stage
)) {
55 assert(glsl_type_is_array(var
->type
));
57 *num_vertices
= glsl_get_length(var
->type
);
58 return glsl_get_array_element(var
->type
);
66 static const struct glsl_type
*
67 resize_array_vec_type(const struct glsl_type
*type
, unsigned num_components
)
69 if (glsl_type_is_array(type
)) {
70 const struct glsl_type
*arr_elem
=
71 resize_array_vec_type(glsl_get_array_element(type
), num_components
);
72 return glsl_array_type(arr_elem
, glsl_get_length(type
), 0);
74 assert(glsl_type_is_vector_or_scalar(type
));
75 return glsl_vector_type(glsl_get_base_type(type
), num_components
);
80 variables_can_merge(const nir_shader
*shader
,
81 const nir_variable
*a
, const nir_variable
*b
,
82 bool same_array_structure
)
84 if (a
->data
.compact
|| b
->data
.compact
)
87 if (a
->data
.per_view
|| b
->data
.per_view
)
90 const struct glsl_type
*a_type_tail
= a
->type
;
91 const struct glsl_type
*b_type_tail
= b
->type
;
93 if (nir_is_per_vertex_io(a
, shader
->info
.stage
) !=
94 nir_is_per_vertex_io(b
, shader
->info
.stage
))
97 /* They must have the same array structure */
98 if (same_array_structure
) {
99 while (glsl_type_is_array(a_type_tail
)) {
100 if (!glsl_type_is_array(b_type_tail
))
103 if (glsl_get_length(a_type_tail
) != glsl_get_length(b_type_tail
))
106 a_type_tail
= glsl_get_array_element(a_type_tail
);
107 b_type_tail
= glsl_get_array_element(b_type_tail
);
109 if (glsl_type_is_array(b_type_tail
))
112 a_type_tail
= glsl_without_array(a_type_tail
);
113 b_type_tail
= glsl_without_array(b_type_tail
);
116 if (!glsl_type_is_vector_or_scalar(a_type_tail
) ||
117 !glsl_type_is_vector_or_scalar(b_type_tail
))
120 if (glsl_get_base_type(a_type_tail
) != glsl_get_base_type(b_type_tail
))
123 /* TODO: add 64/16bit support ? */
124 if (glsl_get_bit_size(a_type_tail
) != 32)
127 assert(a
->data
.mode
== b
->data
.mode
);
128 if (shader
->info
.stage
== MESA_SHADER_FRAGMENT
&&
129 a
->data
.mode
== nir_var_shader_in
&&
130 a
->data
.interpolation
!= b
->data
.interpolation
)
133 if (shader
->info
.stage
== MESA_SHADER_FRAGMENT
&&
134 a
->data
.mode
== nir_var_shader_out
&&
135 a
->data
.index
!= b
->data
.index
)
138 /* It's tricky to merge XFB-outputs correctly, because we need there
139 * to not be any overlaps when we get to
140 * nir_gather_xfb_info_with_varyings later on. We'll end up
141 * triggering an assert there if we merge here.
143 if ((shader
->info
.stage
== MESA_SHADER_VERTEX
||
144 shader
->info
.stage
== MESA_SHADER_TESS_EVAL
||
145 shader
->info
.stage
== MESA_SHADER_GEOMETRY
) &&
146 a
->data
.mode
== nir_var_shader_out
&&
147 (a
->data
.explicit_xfb_buffer
|| b
->data
.explicit_xfb_buffer
))
153 static const struct glsl_type
*
154 get_flat_type(const nir_shader
*shader
, nir_variable
*old_vars
[MAX_SLOTS
][4],
155 unsigned *loc
, nir_variable
**first_var
, unsigned *num_vertices
)
159 unsigned num_vars
= 0;
160 enum glsl_base_type base
;
165 assert(*loc
< MAX_SLOTS
);
166 for (unsigned frac
= 0; frac
< 4; frac
++) {
167 nir_variable
*var
= old_vars
[*loc
][frac
];
171 !variables_can_merge(shader
, var
, *first_var
, false)) ||
178 if (!glsl_type_is_vector_or_scalar(glsl_without_array(var
->type
))) {
183 base
= glsl_get_base_type(
184 glsl_without_array(get_per_vertex_type(shader
, var
, NULL
)));
187 bool vs_in
= shader
->info
.stage
== MESA_SHADER_VERTEX
&&
188 var
->data
.mode
== nir_var_shader_in
;
189 unsigned var_slots
= glsl_count_attribute_slots(
190 get_per_vertex_type(shader
, var
, num_vertices
), vs_in
);
191 todo
= MAX2(todo
, var_slots
);
203 return glsl_vector_type(base
, 4);
205 return glsl_array_type(glsl_vector_type(base
, 4), slots
, 0);
209 create_new_io_vars(nir_shader
*shader
, nir_variable_mode mode
,
210 nir_variable
*new_vars
[MAX_SLOTS
][4],
211 bool flat_vars
[MAX_SLOTS
])
213 nir_variable
*old_vars
[MAX_SLOTS
][4] = {{0}};
215 bool has_io_var
= false;
216 nir_foreach_variable_with_modes(var
, shader
, mode
) {
217 unsigned frac
= var
->data
.location_frac
;
218 old_vars
[get_slot(var
)][frac
] = var
;
225 bool merged_any_vars
= false;
227 for (unsigned loc
= 0; loc
< MAX_SLOTS
; loc
++) {
230 nir_variable
*first_var
= old_vars
[loc
][frac
];
237 bool found_merge
= false;
240 nir_variable
*var
= old_vars
[loc
][frac
];
244 if (var
!= first_var
) {
245 if (!variables_can_merge(shader
, first_var
, var
, true))
251 const unsigned num_components
=
252 glsl_get_components(glsl_without_array(var
->type
));
253 if (!num_components
) {
256 break; /* The type was a struct. */
259 /* We had better not have any overlapping vars */
260 for (unsigned i
= 1; i
< num_components
; i
++)
261 assert(old_vars
[loc
][frac
+ i
] == NULL
);
263 frac
+= num_components
;
269 merged_any_vars
= true;
271 nir_variable
*var
= nir_variable_clone(old_vars
[loc
][first
], shader
);
272 var
->data
.location_frac
= first
;
273 var
->type
= resize_array_vec_type(var
->type
, frac
- first
);
275 nir_shader_add_variable(shader
, var
);
276 for (unsigned i
= first
; i
< frac
; i
++) {
277 new_vars
[loc
][i
] = var
;
278 old_vars
[loc
][i
] = NULL
;
281 old_vars
[loc
][first
] = var
;
285 /* "flat" mode: tries to ensure there is at most one variable per slot by
286 * merging variables into vec4s
288 for (unsigned loc
= 0; loc
< MAX_SLOTS
;) {
289 nir_variable
*first_var
;
290 unsigned num_vertices
;
291 unsigned new_loc
= loc
;
292 const struct glsl_type
*flat_type
=
293 get_flat_type(shader
, old_vars
, &new_loc
, &first_var
, &num_vertices
);
295 merged_any_vars
= true;
297 nir_variable
*var
= nir_variable_clone(first_var
, shader
);
298 var
->data
.location_frac
= 0;
300 var
->type
= glsl_array_type(flat_type
, num_vertices
, 0);
302 var
->type
= flat_type
;
304 nir_shader_add_variable(shader
, var
);
305 for (unsigned i
= 0; i
< glsl_get_length(flat_type
); i
++) {
306 for (unsigned j
= 0; j
< 4; j
++)
307 new_vars
[loc
+ i
][j
] = var
;
308 flat_vars
[loc
+ i
] = true;
314 return merged_any_vars
;
317 static nir_deref_instr
*
318 build_array_deref_of_new_var(nir_builder
*b
, nir_variable
*new_var
,
319 nir_deref_instr
*leader
)
321 if (leader
->deref_type
== nir_deref_type_var
)
322 return nir_build_deref_var(b
, new_var
);
324 nir_deref_instr
*parent
=
325 build_array_deref_of_new_var(b
, new_var
, nir_deref_instr_parent(leader
));
327 return nir_build_deref_follower(b
, parent
, leader
);
331 build_array_index(nir_builder
*b
, nir_deref_instr
*deref
, nir_ssa_def
*base
,
334 switch (deref
->deref_type
) {
335 case nir_deref_type_var
:
337 case nir_deref_type_array
: {
338 nir_ssa_def
*index
= nir_i2i(b
, deref
->arr
.index
.ssa
,
339 deref
->dest
.ssa
.bit_size
);
341 b
, build_array_index(b
, nir_deref_instr_parent(deref
), base
, vs_in
),
342 nir_amul_imm(b
, index
, glsl_count_attribute_slots(deref
->type
, vs_in
)));
345 unreachable("Invalid deref instruction type");
349 static nir_deref_instr
*
350 build_array_deref_of_new_var_flat(nir_shader
*shader
,
351 nir_builder
*b
, nir_variable
*new_var
,
352 nir_deref_instr
*leader
, unsigned base
)
354 nir_deref_instr
*deref
= nir_build_deref_var(b
, new_var
);
356 if (nir_is_per_vertex_io(new_var
, shader
->info
.stage
)) {
357 assert(leader
->deref_type
== nir_deref_type_array
);
358 nir_ssa_def
*index
= leader
->arr
.index
.ssa
;
359 leader
= nir_deref_instr_parent(leader
);
360 deref
= nir_build_deref_array(b
, deref
, index
);
363 if (!glsl_type_is_array(deref
->type
))
366 bool vs_in
= shader
->info
.stage
== MESA_SHADER_VERTEX
&&
367 new_var
->data
.mode
== nir_var_shader_in
;
368 return nir_build_deref_array(
369 b
, deref
, build_array_index(b
, leader
, nir_imm_int(b
, base
), vs_in
));
373 nir_lower_io_to_vector_impl(nir_function_impl
*impl
, nir_variable_mode modes
)
375 assert(!(modes
& ~(nir_var_shader_in
| nir_var_shader_out
)));
378 nir_builder_init(&b
, impl
);
380 nir_metadata_require(impl
, nir_metadata_dominance
);
382 nir_shader
*shader
= impl
->function
->shader
;
383 nir_variable
*new_inputs
[MAX_SLOTS
][4] = {{0}};
384 nir_variable
*new_outputs
[MAX_SLOTS
][4] = {{0}};
385 bool flat_inputs
[MAX_SLOTS
] = {0};
386 bool flat_outputs
[MAX_SLOTS
] = {0};
388 if (modes
& nir_var_shader_in
) {
389 /* Vertex shaders support overlapping inputs. We don't do those */
390 assert(b
.shader
->info
.stage
!= MESA_SHADER_VERTEX
);
392 /* If we don't actually merge any variables, remove that bit from modes
393 * so we don't bother doing extra non-work.
395 if (!create_new_io_vars(shader
, nir_var_shader_in
,
396 new_inputs
, flat_inputs
))
397 modes
&= ~nir_var_shader_in
;
400 if (modes
& nir_var_shader_out
) {
401 /* If we don't actually merge any variables, remove that bit from modes
402 * so we don't bother doing extra non-work.
404 if (!create_new_io_vars(shader
, nir_var_shader_out
,
405 new_outputs
, flat_outputs
))
406 modes
&= ~nir_var_shader_out
;
412 bool progress
= false;
414 /* Actually lower all the IO load/store intrinsics. Load instructions are
415 * lowered to a vector load and an ALU instruction to grab the channels we
416 * want. Outputs are lowered to a write-masked store of the vector output.
417 * For non-TCS outputs, we then run nir_lower_io_to_temporaries at the end
418 * to clean up the partial writes.
420 nir_foreach_block(block
, impl
) {
421 nir_foreach_instr_safe(instr
, block
) {
422 if (instr
->type
!= nir_instr_type_intrinsic
)
425 nir_intrinsic_instr
*intrin
= nir_instr_as_intrinsic(instr
);
427 switch (intrin
->intrinsic
) {
428 case nir_intrinsic_load_deref
:
429 case nir_intrinsic_interp_deref_at_centroid
:
430 case nir_intrinsic_interp_deref_at_sample
:
431 case nir_intrinsic_interp_deref_at_offset
:
432 case nir_intrinsic_interp_deref_at_vertex
: {
433 nir_deref_instr
*old_deref
= nir_src_as_deref(intrin
->src
[0]);
434 if (!(old_deref
->mode
& modes
))
437 if (old_deref
->mode
== nir_var_shader_out
)
438 assert(b
.shader
->info
.stage
== MESA_SHADER_TESS_CTRL
||
439 b
.shader
->info
.stage
== MESA_SHADER_FRAGMENT
);
441 nir_variable
*old_var
= nir_deref_instr_get_variable(old_deref
);
443 const unsigned loc
= get_slot(old_var
);
444 const unsigned old_frac
= old_var
->data
.location_frac
;
445 nir_variable
*new_var
= old_deref
->mode
== nir_var_shader_in
?
446 new_inputs
[loc
][old_frac
] :
447 new_outputs
[loc
][old_frac
];
448 bool flat
= old_deref
->mode
== nir_var_shader_in
?
449 flat_inputs
[loc
] : flat_outputs
[loc
];
453 const unsigned new_frac
= new_var
->data
.location_frac
;
455 nir_component_mask_t vec4_comp_mask
=
456 ((1 << intrin
->num_components
) - 1) << old_frac
;
458 b
.cursor
= nir_before_instr(&intrin
->instr
);
460 /* Rewrite the load to use the new variable and only select a
461 * portion of the result.
463 nir_deref_instr
*new_deref
;
465 new_deref
= build_array_deref_of_new_var_flat(
466 shader
, &b
, new_var
, old_deref
, loc
- get_slot(new_var
));
468 assert(get_slot(new_var
) == loc
);
469 new_deref
= build_array_deref_of_new_var(&b
, new_var
, old_deref
);
470 assert(glsl_type_is_vector(new_deref
->type
));
472 nir_instr_rewrite_src(&intrin
->instr
, &intrin
->src
[0],
473 nir_src_for_ssa(&new_deref
->dest
.ssa
));
475 intrin
->num_components
=
476 glsl_get_components(new_deref
->type
);
477 intrin
->dest
.ssa
.num_components
= intrin
->num_components
;
479 b
.cursor
= nir_after_instr(&intrin
->instr
);
481 nir_ssa_def
*new_vec
= nir_channels(&b
, &intrin
->dest
.ssa
,
482 vec4_comp_mask
>> new_frac
);
483 nir_ssa_def_rewrite_uses_after(&intrin
->dest
.ssa
,
484 nir_src_for_ssa(new_vec
),
485 new_vec
->parent_instr
);
491 case nir_intrinsic_store_deref
: {
492 nir_deref_instr
*old_deref
= nir_src_as_deref(intrin
->src
[0]);
493 if (old_deref
->mode
!= nir_var_shader_out
)
496 nir_variable
*old_var
= nir_deref_instr_get_variable(old_deref
);
498 const unsigned loc
= get_slot(old_var
);
499 const unsigned old_frac
= old_var
->data
.location_frac
;
500 nir_variable
*new_var
= new_outputs
[loc
][old_frac
];
501 bool flat
= flat_outputs
[loc
];
505 const unsigned new_frac
= new_var
->data
.location_frac
;
507 b
.cursor
= nir_before_instr(&intrin
->instr
);
509 /* Rewrite the store to be a masked store to the new variable */
510 nir_deref_instr
*new_deref
;
512 new_deref
= build_array_deref_of_new_var_flat(
513 shader
, &b
, new_var
, old_deref
, loc
- get_slot(new_var
));
515 assert(get_slot(new_var
) == loc
);
516 new_deref
= build_array_deref_of_new_var(&b
, new_var
, old_deref
);
517 assert(glsl_type_is_vector(new_deref
->type
));
519 nir_instr_rewrite_src(&intrin
->instr
, &intrin
->src
[0],
520 nir_src_for_ssa(&new_deref
->dest
.ssa
));
522 intrin
->num_components
=
523 glsl_get_components(new_deref
->type
);
525 nir_component_mask_t old_wrmask
= nir_intrinsic_write_mask(intrin
);
527 assert(intrin
->src
[1].is_ssa
);
528 nir_ssa_def
*old_value
= intrin
->src
[1].ssa
;
529 nir_ssa_def
*comps
[4];
530 for (unsigned c
= 0; c
< intrin
->num_components
; c
++) {
531 if (new_frac
+ c
>= old_frac
&&
532 (old_wrmask
& 1 << (new_frac
+ c
- old_frac
))) {
533 comps
[c
] = nir_channel(&b
, old_value
,
534 new_frac
+ c
- old_frac
);
536 comps
[c
] = nir_ssa_undef(&b
, old_value
->num_components
,
537 old_value
->bit_size
);
540 nir_ssa_def
*new_value
= nir_vec(&b
, comps
, intrin
->num_components
);
541 nir_instr_rewrite_src(&intrin
->instr
, &intrin
->src
[1],
542 nir_src_for_ssa(new_value
));
544 nir_intrinsic_set_write_mask(intrin
,
545 old_wrmask
<< (old_frac
- new_frac
));
558 nir_metadata_preserve(impl
, nir_metadata_block_index
|
559 nir_metadata_dominance
);
566 nir_lower_io_to_vector(nir_shader
*shader
, nir_variable_mode modes
)
568 bool progress
= false;
570 nir_foreach_function(function
, shader
) {
572 progress
|= nir_lower_io_to_vector_impl(function
->impl
, modes
);