2 * Copyright © 2019 Intel Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
25 #include "nir_builder.h"
26 #include "nir_deref.h"
28 /** @file nir_lower_io_to_vector.c
30 * Merges compatible input/output variables residing in different components
31 * of the same location. It's expected that further passes such as
32 * nir_lower_io_to_temporaries will combine loads and stores of the merged
33 * variables, producing vector nir_load_input/nir_store_output instructions
34 * when all is said and done.
37 static const struct glsl_type
*
38 resize_array_vec_type(const struct glsl_type
*type
, unsigned num_components
)
40 if (glsl_type_is_array(type
)) {
41 const struct glsl_type
*arr_elem
=
42 resize_array_vec_type(glsl_get_array_element(type
), num_components
);
43 return glsl_array_type(arr_elem
, glsl_get_length(type
), 0);
45 assert(glsl_type_is_vector_or_scalar(type
));
46 return glsl_vector_type(glsl_get_base_type(type
), num_components
);
51 variable_can_rewrite(const nir_variable
*var
)
53 /* Only touch user defined varyings as these are the only ones we split */
54 if (var
->data
.location
< VARYING_SLOT_VAR0
)
57 /* Skip complex types we don't split in the first place */
58 if (!glsl_type_is_vector_or_scalar(glsl_without_array(var
->type
)))
61 /* TODO: add 64/16bit support ? */
62 if (glsl_get_bit_size(glsl_without_array(var
->type
)) != 32)
69 variables_can_merge(nir_shader
*shader
,
70 const nir_variable
*a
, const nir_variable
*b
)
72 const struct glsl_type
*a_type_tail
= a
->type
;
73 const struct glsl_type
*b_type_tail
= b
->type
;
75 /* They must have the same array structure */
76 while (glsl_type_is_array(a_type_tail
)) {
77 if (!glsl_type_is_array(b_type_tail
))
80 if (glsl_get_length(a_type_tail
) != glsl_get_length(b_type_tail
))
83 a_type_tail
= glsl_get_array_element(a_type_tail
);
84 b_type_tail
= glsl_get_array_element(b_type_tail
);
87 if (!glsl_type_is_vector_or_scalar(a_type_tail
) ||
88 !glsl_type_is_vector_or_scalar(b_type_tail
))
91 if (glsl_get_base_type(a
->type
) != glsl_get_base_type(b
->type
))
94 assert(a
->data
.mode
== b
->data
.mode
);
95 if (shader
->info
.stage
== MESA_SHADER_FRAGMENT
&&
96 a
->data
.mode
== nir_var_shader_in
&&
97 a
->data
.interpolation
!= b
->data
.interpolation
)
104 create_new_io_vars(nir_shader
*shader
, struct exec_list
*io_list
,
105 nir_variable
*old_vars
[MAX_VARYINGS_INCL_PATCH
][4],
106 nir_variable
*new_vars
[MAX_VARYINGS_INCL_PATCH
][4])
108 if (exec_list_is_empty(io_list
))
111 nir_foreach_variable(var
, io_list
) {
112 if (variable_can_rewrite(var
)) {
113 unsigned loc
= var
->data
.location
- VARYING_SLOT_VAR0
;
114 unsigned frac
= var
->data
.location_frac
;
115 old_vars
[loc
][frac
] = var
;
119 bool merged_any_vars
= false;
121 /* We don't handle combining vars of different type e.g. different array
124 for (unsigned loc
= 0; loc
< MAX_VARYINGS_INCL_PATCH
; loc
++) {
127 nir_variable
*first_var
= old_vars
[loc
][frac
];
134 bool found_merge
= false;
137 nir_variable
*var
= old_vars
[loc
][frac
];
141 if (var
!= first_var
) {
142 if (!variables_can_merge(shader
, first_var
, var
))
148 const unsigned num_components
=
149 glsl_get_components(glsl_without_array(var
->type
));
151 /* We had better not have any overlapping vars */
152 for (unsigned i
= 1; i
< num_components
; i
++)
153 assert(old_vars
[loc
][frac
+ i
] == NULL
);
155 frac
+= num_components
;
161 merged_any_vars
= true;
163 nir_variable
*var
= nir_variable_clone(old_vars
[loc
][first
], shader
);
164 var
->data
.location_frac
= first
;
165 var
->type
= resize_array_vec_type(var
->type
, frac
- first
);
167 nir_shader_add_variable(shader
, var
);
168 for (unsigned i
= first
; i
< frac
; i
++)
169 new_vars
[loc
][i
] = var
;
173 return merged_any_vars
;
176 static nir_deref_instr
*
177 build_array_deref_of_new_var(nir_builder
*b
, nir_variable
*new_var
,
178 nir_deref_instr
*leader
)
180 if (leader
->deref_type
== nir_deref_type_var
)
181 return nir_build_deref_var(b
, new_var
);
183 nir_deref_instr
*parent
=
184 build_array_deref_of_new_var(b
, new_var
, nir_deref_instr_parent(leader
));
186 return nir_build_deref_follower(b
, parent
, leader
);
190 nir_lower_io_to_vector_impl(nir_function_impl
*impl
, nir_variable_mode modes
)
192 assert(!(modes
& ~(nir_var_shader_in
| nir_var_shader_out
)));
195 nir_builder_init(&b
, impl
);
197 nir_metadata_require(impl
, nir_metadata_dominance
);
199 nir_shader
*shader
= impl
->function
->shader
;
200 nir_variable
*old_inputs
[MAX_VARYINGS_INCL_PATCH
][4] = {{0}};
201 nir_variable
*new_inputs
[MAX_VARYINGS_INCL_PATCH
][4] = {{0}};
202 nir_variable
*old_outputs
[MAX_VARYINGS_INCL_PATCH
][4] = {{0}};
203 nir_variable
*new_outputs
[MAX_VARYINGS_INCL_PATCH
][4] = {{0}};
205 if (modes
& nir_var_shader_in
) {
206 /* Vertex shaders support overlapping inputs. We don't do those */
207 assert(b
.shader
->info
.stage
!= MESA_SHADER_VERTEX
);
209 /* If we don't actually merge any variables, remove that bit from modes
210 * so we don't bother doing extra non-work.
212 if (!create_new_io_vars(shader
, &shader
->inputs
,
213 old_inputs
, new_inputs
))
214 modes
&= ~nir_var_shader_in
;
217 if (modes
& nir_var_shader_out
) {
218 /* Fragment shader outputs are always vec4. You shouldn't have
219 * scalarized them and it doesn't make sense to vectorize them.
221 assert(b
.shader
->info
.stage
!= MESA_SHADER_FRAGMENT
);
223 /* If we don't actually merge any variables, remove that bit from modes
224 * so we don't bother doing extra non-work.
226 if (!create_new_io_vars(shader
, &shader
->outputs
,
227 old_outputs
, new_outputs
))
228 modes
&= ~nir_var_shader_out
;
234 bool progress
= false;
236 /* Actually lower all the IO load/store intrinsics. Load instructions are
237 * lowered to a vector load and an ALU instruction to grab the channels we
238 * want. Outputs are lowered to a write-masked store of the vector output.
239 * For non-TCS outputs, we then run nir_lower_io_to_temporaries at the end
240 * to clean up the partial writes.
242 nir_foreach_block(block
, impl
) {
243 nir_foreach_instr_safe(instr
, block
) {
244 if (instr
->type
!= nir_instr_type_intrinsic
)
247 nir_intrinsic_instr
*intrin
= nir_instr_as_intrinsic(instr
);
249 switch (intrin
->intrinsic
) {
250 case nir_intrinsic_load_deref
:
251 case nir_intrinsic_interp_deref_at_centroid
:
252 case nir_intrinsic_interp_deref_at_sample
:
253 case nir_intrinsic_interp_deref_at_offset
: {
254 nir_deref_instr
*old_deref
= nir_src_as_deref(intrin
->src
[0]);
255 if (!(old_deref
->mode
& modes
))
258 if (old_deref
->mode
== nir_var_shader_out
)
259 assert(b
.shader
->info
.stage
== MESA_SHADER_TESS_CTRL
);
261 nir_variable
*old_var
= nir_deref_instr_get_variable(old_deref
);
262 if (old_var
->data
.location
< VARYING_SLOT_VAR0
)
265 const unsigned loc
= old_var
->data
.location
- VARYING_SLOT_VAR0
;
266 const unsigned old_frac
= old_var
->data
.location_frac
;
267 nir_variable
*new_var
= old_deref
->mode
== nir_var_shader_in
?
268 new_inputs
[loc
][old_frac
] :
269 new_outputs
[loc
][old_frac
];
273 assert(new_var
->data
.location
== VARYING_SLOT_VAR0
+ loc
);
274 const unsigned new_frac
= new_var
->data
.location_frac
;
276 nir_component_mask_t vec4_comp_mask
=
277 ((1 << intrin
->num_components
) - 1) << old_frac
;
279 b
.cursor
= nir_before_instr(&intrin
->instr
);
281 /* Rewrite the load to use the new variable and only select a
282 * portion of the result.
284 nir_deref_instr
*new_deref
=
285 build_array_deref_of_new_var(&b
, new_var
, old_deref
);
286 assert(glsl_type_is_vector(new_deref
->type
));
287 nir_instr_rewrite_src(&intrin
->instr
, &intrin
->src
[0],
288 nir_src_for_ssa(&new_deref
->dest
.ssa
));
290 intrin
->num_components
=
291 glsl_get_components(new_deref
->type
);
292 intrin
->dest
.ssa
.num_components
= intrin
->num_components
;
294 b
.cursor
= nir_after_instr(&intrin
->instr
);
296 nir_ssa_def
*new_vec
= nir_channels(&b
, &intrin
->dest
.ssa
,
297 vec4_comp_mask
>> new_frac
);
298 nir_ssa_def_rewrite_uses_after(&intrin
->dest
.ssa
,
299 nir_src_for_ssa(new_vec
),
300 new_vec
->parent_instr
);
306 case nir_intrinsic_store_deref
: {
307 nir_deref_instr
*old_deref
= nir_src_as_deref(intrin
->src
[0]);
308 if (old_deref
->mode
!= nir_var_shader_out
)
311 nir_variable
*old_var
= nir_deref_instr_get_variable(old_deref
);
312 if (old_var
->data
.location
< VARYING_SLOT_VAR0
)
315 const unsigned loc
= old_var
->data
.location
- VARYING_SLOT_VAR0
;
316 const unsigned old_frac
= old_var
->data
.location_frac
;
317 nir_variable
*new_var
= new_outputs
[loc
][old_frac
];
321 assert(new_var
->data
.location
== VARYING_SLOT_VAR0
+ loc
);
322 const unsigned new_frac
= new_var
->data
.location_frac
;
324 b
.cursor
= nir_before_instr(&intrin
->instr
);
326 /* Rewrite the store to be a masked store to the new variable */
327 nir_deref_instr
*new_deref
=
328 build_array_deref_of_new_var(&b
, new_var
, old_deref
);
329 assert(glsl_type_is_vector(new_deref
->type
));
330 nir_instr_rewrite_src(&intrin
->instr
, &intrin
->src
[0],
331 nir_src_for_ssa(&new_deref
->dest
.ssa
));
333 intrin
->num_components
=
334 glsl_get_components(new_deref
->type
);
336 nir_component_mask_t old_wrmask
= nir_intrinsic_write_mask(intrin
);
338 assert(intrin
->src
[1].is_ssa
);
339 nir_ssa_def
*old_value
= intrin
->src
[1].ssa
;
340 nir_ssa_def
*comps
[4];
341 for (unsigned c
= 0; c
< intrin
->num_components
; c
++) {
342 if (new_frac
+ c
>= old_frac
&&
343 (old_wrmask
& 1 << (new_frac
+ c
- old_frac
))) {
344 comps
[c
] = nir_channel(&b
, old_value
,
345 new_frac
+ c
- old_frac
);
347 comps
[c
] = nir_ssa_undef(&b
, old_value
->num_components
,
348 old_value
->bit_size
);
351 nir_ssa_def
*new_value
= nir_vec(&b
, comps
, intrin
->num_components
);
352 nir_instr_rewrite_src(&intrin
->instr
, &intrin
->src
[1],
353 nir_src_for_ssa(new_value
));
355 nir_intrinsic_set_write_mask(intrin
,
356 old_wrmask
<< (old_frac
- new_frac
));
369 nir_metadata_preserve(impl
, nir_metadata_block_index
|
370 nir_metadata_dominance
);
377 nir_lower_io_to_vector(nir_shader
*shader
, nir_variable_mode modes
)
379 bool progress
= false;
381 nir_foreach_function(function
, shader
) {
383 progress
|= nir_lower_io_to_vector_impl(function
->impl
, modes
);